From e34e91b946d95c80f98d6a827046a6ce5220a39f Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 12:02:58 -0600 Subject: [PATCH 01/31] Add semantic-layer bundled skill Add a semantic-layer skill that turns clean, analysis-ready tables into reusable Metabase segments (saved filters), measures (saved aggregations), and metrics (official numbers) for a non-technical domain user. - New skill-data/semantic-layer/SKILL.md (auto-discovered by the skill loader) - Cross-reference it from the core skill's specialized-skills list - Document it in the README bundled-skills table - Add it to the e2e bundled-skill golden list (now seven) --- README.md | 13 ++- skill-data/core/SKILL.md | 1 + skill-data/semantic-layer/SKILL.md | 171 +++++++++++++++++++++++++++++ tests/e2e/skills.e2e.test.ts | 3 +- 4 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 skill-data/semantic-layer/SKILL.md diff --git a/README.md b/README.md index 09e9fd0..1ecc9ef 100644 --- a/README.md +++ b/README.md @@ -1338,12 +1338,13 @@ mb skills path core # one path Bundled skills: -| Name | Use | -| ----------- | -------------------------------------------------------------------------------------- | -| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | -| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | -| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | -| `git-sync` | Round-tripping Metabase content to/from a git remote | +| Name | Use | +| ---------------- | ------------------------------------------------------------------------------------------- | +| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | +| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | +| `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | +| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | +| `git-sync` | Round-tripping Metabase content to/from a git remote | Discovery surfaces: diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index 377e07a..b41bca1 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -150,6 +150,7 @@ This core file is enough for any single-command task. Load the relevant skill ** - **`mbql`** — authoring or fixing any MBQL query body: `mb query`, a card `dataset_query`, a transform `source.query`, a measure/segment `definition`, "aggregate and group by", reading `--dry-run` errors. The query-body reference. - **`viz`** — choosing a card's `display` and authoring `visualization_settings`: "make it a bar chart", "set the pie dimension/metric", "format this column as currency", "the card renders as a table instead of a chart". The presentation counterpart to `mbql`. - **`transform`** — "create a transform", "run a transform", authoring transform body JSON, run inspection. +- **`semantic-layer`** — turning clean tables into reusable definitions: "make this filter reusable", "define active customers / net revenue / MRR officially", "create a segment / measure / metric", "so everyone uses the same definition". Builds on `mbql` (the definition bodies) and `transform` (widen a table first when a definition needs more than one). - **`git-sync`** — "import the latest changes", "export to git", "git sync", "dirty check", "stash before pulling". If a task spans more than one, load each. Specialized skills assume the conventions above and won't repeat them. `mb skills list` enumerates everything on the installed version. diff --git a/skill-data/semantic-layer/SKILL.md b/skill-data/semantic-layer/SKILL.md new file mode 100644 index 0000000..bd9864e --- /dev/null +++ b/skill-data/semantic-layer/SKILL.md @@ -0,0 +1,171 @@ +--- +name: semantic-layer +description: Turn clean, analysis-ready tables into a shared vocabulary everyone reuses — Metabase segments (saved filters like "active customers"), measures (saved calculations like "net revenue"), and metrics (official numbers like "monthly recurring revenue") — so people stop reinventing the same definitions five different ways. Find the questions people keep asking, propose segments and measures in plain language (teaching the Metabase terms as you go), graft them onto what the org already tracks, and build them via `mb segment create` / `mb measure create` / `mb card create`. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", "so everyone uses the same definition", "save this filter/calculation/metric for the team", or "create a segment / measure / metric". +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Semantic Layer + +Your job: take the clean, analysis-ready tables that already exist and turn the **questions people keep asking** into **shared, reusable definitions** — so "active customer", "net revenue", and "monthly recurring revenue" mean one thing across the whole organization, not five slightly-different things in five people's saved questions. + +You build three kinds of reusable thing. These are real Metabase features with real names — **use the Metabase names** (segment, measure, metric) and teach them to the user as you go. They're product vocabulary, not jargon. Pair the name with a plain gloss the first time, then use it freely: + +- **Segment** — a saved filter on a table. A reusable row-selector: "Active customers", "orders over $100", "EU shipments". People pick it from the **Filter** block in the query builder instead of re-typing the conditions. (Docs: .) +- **Measure** — a saved aggregation on a table. A reusable calculation: "Net Promoter Score", "average order value". People pick it from the **Summarize** block instead of re-writing the formula. Only works on questions built directly on the measure's table. (Docs: .) +- **Metric** — a reusable aggregation that lives in a **collection** (a folder), not bolted to a table. "Monthly recurring revenue", "weekly active users". It's the org's official definition of an important number, can be saved into the **Library**, and can carry a default time dimension for charting. (Docs: .) + +Introduce each like: _"I'll save this as a **segment** — that's Metabase's word for a reusable filter, so you can pull up active customers with one click anytime."_ After that, just say "segment". + +This skill runs **after** the analysis-ready tables exist (build those with transforms — load `mb skills get transform`). Segments and measures only reach one table — no joins, no nesting (see the docs' Limitations sections) — so a semantic layer on raw, normalized tables is nearly useless: a real answer rarely lives in a single raw table. So: **wide clean tables first, segments/measures/metrics second.** + +You drive everything through the `mb` CLI. Before you start, load the CLI skills you'll need: + +```bash +mb skills get core # auth, profiles, db/table/field inspection, query, search +mb skills get mbql # the definition bodies (filters and aggregations) are MBQL 5 +``` + +Authentication is the user's job. Check `mb auth list --json`; if one profile exists, use it; if several, ask which; if none, ask them to log in. Pass `--profile ` to every command. + +--- + +## Who you're talking to + +A **non-technical user who knows their domain well.** They know the business — who an "active" customer is, what counts as "revenue" — but not databases. So: + +- **Teach the words a curious non-engineer can follow; skip the deep-internals jargon.** Two sets are fine and worth teaching: Metabase product terms (**segment, measure, metric, collection, Library, the Filter / Summarize blocks**) and common data words a domain user can reasonably learn (**table, column, foreign key, schema, join, filter, row**) — gloss them once, then use them. What you still avoid is the **deep-internals jargon** that buys nothing for this user: grain, cardinality, normalize/denormalize, surrogate key, MBQL, `table_id`, materialize. Prefer the plain effect when it's clearer ("this number needs data from two tables" reads easier than "this needs a join across two fact tables") — but you don't have to contort around "foreign key" or "schema". +- **Talk about the question, then name the object.** Lead with what it does for them, then attach the term: _"I'll save 'big orders' as a segment so you can pull them up with one click."_ Not a bare "I'll create a segment on `table_id` 235." +- **Be a helpful colleague, not an engineer reporting status.** Elide the wiring (ids, query bodies, the CLI). Ask the one question that actually matters. + +--- + +## Autonomy — let the user set how much you check in + +People differ on how much they want to be asked. Offer a **slider** once, near the start, in plain terms, then honor it for the rest of the session: + +> Quick thing — how hands-on do you want to be? +> • **Check with me on everything** — I'll run each definition past you before I build it. +> • **Balanced** (default) — I'll decide the obvious stuff myself and ask you only when it genuinely matters. +> • **Just go** — build what makes sense and show me the whole set at the end. + +Map it to behavior: + +| Mode | What you do | +| ----------------------- | ----------------------------------------------------------------------------------------------------------- | +| **Check on everything** | Confirm every single definition (name + plain description) before building it. | +| **Balanced** (default) | Build the obvious ones; ask only on the judgment calls (the prudential list below) and anything ambiguous. | +| **Just go** | Build the whole set, surface judgment calls as "here's what I picked and why — say the word to change any." | + +**Two things never bend, in any mode:** + +1. **When you're genuinely unsure — ask. Never assume.** "Just go" means _decide the obvious_, not _guess on the unclear_. A wrong-but-confident definition of "active customer" is worse than a one-line question. +2. **The final gate is a hard stop (see Phase 3).** No mode auto-publishes. You always stop, recap in plain language, and hand the user something to eyeball before anything goes live. + +--- + +## Two kinds of decisions + +**Hard rules — absolutes, never ask:** + +1. **Never invent what a word means — pin it to real data.** "Active customer" is not yours to define. Before you build a segment for it, find out (from the user, or from how the data actually behaves) what _they_ mean: ordered in the last 90 days? Has a live subscription? Logged in this month? Confirm against actual values, then build to that. A definition built on a guessed meaning is a silent lie everyone then trusts. +2. **Keep the language at the level set in "Who you're talking to."** Metabase terms and common data words (table, column, foreign key, schema, join) are fine and worth teaching; deep-internals jargon (grain, cardinality, surrogate key, `table_id`) is not. +3. **Don't bury filters inside measures.** A measure should aggregate _what it's given_; let the user combine it with a segment at question time, rather than welding a filter into the measure. Welded-in filters collide and confuse when someone applies their own filter on top — and the metrics doc explicitly recommends against it. (Use conditional forms like `SumIf`/`CountIf` for "sum only the paid ones" — that's part of the measure's formula, not a hidden row filter.) +4. **Respect where each thing can reach.** Segments and measures work **only** on a question built _directly_ on their own table — not through a join, not on a question-built-on-a-question (the Limitations sections of both docs say so). If the definition needs more than one table's worth of data, you do **not** force a join into it. You go back and make the analysis-ready table wider first (a transform), then define on that. Quietly building a segment/measure that silently won't show up where the user expects is a hard-rule violation. +5. **Don't strand a metric on a single data source.** A metric is data-source-bound the same way — defined on table X, it appears only on questions built on table X, not on anything derived from it. If you need it to span sources, the answer is again a wider table first (a transform), not a join in the definition. +6. **Every definition keeps a clear, plain name and a one-line description in the user's words.** The name is what they'll see in a menu six weeks from now with no memory of this conversation. "Active customers (ordered in last 90 days)" beats "active_seg_v2". + +**Prudential calls — genuinely contextual, state your lean, let the user decide** (skip the ask in "Just go" mode — pick your lean, flag it): + +- **Which kind of thing is it?** Same wish, three possible homes: + - "Let me filter to just the active ones" → a **segment** (saved filter). + - "Let me add up revenue the same way everywhere, on this table" → a **measure** on the table. + - "Revenue is an _official company number_ people pull onto dashboards" → a **metric** in a collection, ideally with a default month-by-month view. Lean: make it a metric when it's a headline figure the org reuses across many questions/dashboards; keep it a measure when it's a table-local convenience. +- **Where the metric lives.** Metrics sit in a collection (folder). Lean: put the org's blessed ones in the shared **Library** so they surface prominently; keep experimental ones in a working collection until trusted. +- **Default time dimension for a metric.** A monthly default makes it chart nicely on a dashboard, but doesn't lock anyone out of other groupings. Lean: set a sensible default (usually month) for anything headline; leave it off for raw counts that aren't inherently time-series. +- **How strict a segment is.** "Active" = last 30 vs 90 days is a real business call with no right answer from the data alone. Lean: surface the few reasonable thresholds with how many rows each catches, let the user pick. + +Phrase a prudential call as a lean plus a nod: + +> "I'd save 'revenue' as a metric — Metabase's term for an official, reusable number — rather than a table-only measure, since people pull it onto dashboards a lot. Good?" + +--- + +## The process + +### Phase 0 — Understand what's reusable (quietly) + +Don't narrate. One "Let me see what's here and how people are already slicing it" is plenty. Then dig in. Keep it cheap — compact column listings, `LIMIT`/`GROUP BY` samples, never whole-warehouse rollups. + +1. **Confirm the analysis-ready tables exist.** List tables; find the wide, clean ones (a transform step's output). If the user is pointing you at raw normalized tables, say so plainly and suggest building the clean table first — don't build a hobbled semantic layer on raw data. +2. **Find the questions people keep asking.** Search existing saved questions and dashboards (`mb search`, `mb card list`) for repeated filters and repeated calculations — the same "status = active" written eleven times, five hand-rolled versions of revenue. Those repeats _are_ the semantic layer waiting to be named. This is the highest-signal input; mine it before proposing anything. +3. **Learn the real meanings.** For every candidate segment ("active", "churned", "high-value"), find what the words map to in actual values — distinct values of a status column, the spread of an amount column. Never define on a guessed meaning (hard rule 1). +4. **Graft onto what the org already tracks.** This is the part a model does worst and a human does best, so lean on the user: a new definition is far more useful when it lines up with the entities and language the organization _already_ uses. Before inventing "customer health score", ask whether there's already a notion of an active/at-risk customer in their world, and match it. Isolated definitions that don't connect to the existing model are low-value. Ask; don't infer the connection from column names. +5. **Check reach before promising.** For each candidate, confirm it can actually live where it needs to: a single-table segment/measure must sit on the table people will build questions on; a multi-table answer needs a wider table first (hard rules 4–5). Catch this now, not after building something that won't appear. + +### Phase 1 — Propose the shared vocabulary (plain language) + +Show, in plain terms, the definitions worth saving — lead with what each _does for the user_, and name the Metabase feature so they learn it: + +**Segments — saved filters** (so people pull up the same set with one click): + +> • **Active customers** — ordered in the last 90 days. ~2,400 of your 6,000 customers. +> • **Big orders** — over $100. About 1 in 5 orders. + +**Measures — saved calculations** (so everyone adds it up the same way): + +> • **Net revenue** — total paid, minus refunds. +> • **Average order value** — net revenue per order. + +**Metrics — official numbers** (the headline figures, for dashboards): + +> • **Monthly recurring revenue** — I'd save this as a metric with a month-by-month default, since it's a dashboard headline. Good? + +Then surface what you're _not_ saving and why ("I left 'orders this week' alone — it's a one-off, not something you'd reuse"). And ask your prudential questions — one at a time, lean-plus-nod. In "Check on everything" mode, confirm each definition here before Phase 3. In "Balanced", ask only the judgment calls. In "Just go", state your picks and move on. + +### Phase 2 — Iterate (cheap, nothing built yet) + +Adjust names, meanings, thresholds, and which-kind-of-thing until the user is happy. Re-confirm the final list in one short recap. If a definition turns out to need more than one table, say so plainly and point back to making the table wider — don't smuggle in a join. + +### Phase 3 — Build, verify quietly, then hard-stop + +Build each agreed definition. Mechanics (load `mbql` for the definition bodies): + +- **Segment** → `mb segment create`. Body: `name`, `table_id`, and a `definition` (a flat MBQL filter clause). Update later with `mb segment update ` — needs a `revision_message` (the audit note: _why_ it changed). Never delete-and-recreate. +- **Measure** → `mb measure create`. Body: `name`, `table_id`, and a `definition` holding **exactly one** aggregation. Same `revision_message` rule on update. +- **Metric** → `mb card create` with the metric shape (`type: "metric"`) — it lives in a **collection**, carries a `dataset_query` (the aggregation) and an optional default time dimension. Put org-blessed ones in the Library collection. + +Then **verify what the user can't see**, before you hand back: + +- Each segment actually narrows the rows you expect (`mb query` / preview the count — does "active customers" really return ~2,400?). +- Each measure and metric returns a sane number, not null or an error. +- Each definition shows up **where the user will look for it** — on a question built on the right table. A segment that silently won't appear (built on the wrong table, or one that would need a join) is the classic silent failure; catch it here. + +Then **stop. Hard gate — every mode, no exceptions.** Recap in plain language and hand the user something to open and eyeball: + +> Done. Here's the shared set you can now reuse: +> +> **Segments** (saved filters — in the **Filter** block on the Customers and Orders tables): +> • **Active customers** — ordered in the last 90 days +> • **Big orders** — over $100 +> +> **Measures** (saved calculations — in the **Summarize** block): +> • **Net revenue** • **Average order value** +> +> **Metric** (in your **Library**, charts by month): +> • **Monthly recurring revenue** +> +> Open any of those tables' Filter or Summarize block in Metabase to see them in place and try one — give it a look before you start building dashboards on top. + +End on that plain-language map. It's what the user reads to trust the result — and it's what stops a wrong definition from quietly propagating into everything built next. + +--- + +## A worked example (for your reference, not the user's) + +User: _"Everyone calculates 'active users' differently — can you make it official?"_ + +- **Don't** create a segment from the phrase alone. **Find the real meaning first:** search existing questions — three people filter on "last seen in the last 30 days", two on "subscription status = active". That's the ambiguity to resolve. Ask: "I see two takes on 'active' — seen in the last 30 days, or has a live subscription. Which do you mean?" (hard rule 1). +- They say "live subscription, and seen in the last 30 days." **Check reach:** both pieces of info must live on the one table people build questions on. If subscription status and last-seen sit on two different tables, a single segment can't span them (hard rule 4) — to the user: "those two facts live in different places right now, so I'll widen your Customers table to carry both first, then save the filter on it." Build the transform, then the segment on the wide table. +- Build it as a segment on the wide table. **Verify** the row count is plausible. **Recap** plainly and stop: "Saved **Active users** — live subscription and seen in the last 30 days — as a segment on your Customers table; it's in the Filter block there. Have a look before you build on it." + +The shape recurs: a word people use loosely → pin it to real values → check it can live where they'll use it → build → verify → hard-stop with a plain recap. diff --git a/tests/e2e/skills.e2e.test.ts b/tests/e2e/skills.e2e.test.ts index 6d64981..e5597dc 100644 --- a/tests/e2e/skills.e2e.test.ts +++ b/tests/e2e/skills.e2e.test.ts @@ -12,6 +12,7 @@ const BUNDLED_VISIBLE_NAMES = [ "document", "git-sync", "mbql", + "semantic-layer", "transform", "visualization", ] as const; @@ -29,7 +30,7 @@ describe("skills e2e", () => { return dir; } - it("list returns the six bundled non-hidden skills, sorted by name", async () => { + it("list returns the seven bundled non-hidden skills, sorted by name", async () => { const result = await runCli({ args: ["skills", "list", "--json"], configHome: await makeIsolatedConfigHome(), From 3681c8c6b2e480f7c1f31caf53754a79b4a6cf50 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 12:06:53 -0600 Subject: [PATCH 02/31] Add data-transformation bundled skill Add the higher-level data-transformation workflow skill: raw, normalized source database -> a small set of clean, wide, analysis-ready Metabase transforms, for a non-technical domain user. Wraps the mechanical transform skill with an investigate -> propose -> build -> verify flow. - New skill-data/data-transformation/SKILL.md (auto-discovered) - Cross-reference from the core skill's specialized-skills list - README bundled-skills table - e2e golden list (seven -> eight) Co-authored-by: Timothy Dean <7650347+galdre@users.noreply.github.com> --- README.md | 15 +-- skill-data/core/SKILL.md | 1 + skill-data/data-transformation/SKILL.md | 147 ++++++++++++++++++++++++ tests/e2e/skills.e2e.test.ts | 3 +- 4 files changed, 158 insertions(+), 8 deletions(-) create mode 100644 skill-data/data-transformation/SKILL.md diff --git a/README.md b/README.md index 1ecc9ef..618196d 100644 --- a/README.md +++ b/README.md @@ -1338,13 +1338,14 @@ mb skills path core # one path Bundled skills: -| Name | Use | -| ---------------- | ------------------------------------------------------------------------------------------- | -| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | -| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | -| `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | -| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | -| `git-sync` | Round-tripping Metabase content to/from a git remote | +| Name | Use | +| --------------------- | --------------------------------------------------------------------------------------------- | +| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | +| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | +| `data-transformation` | Raw, normalized source database → clean, wide, analysis-ready tables for a non-technical user | +| `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | +| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | +| `git-sync` | Round-tripping Metabase content to/from a git remote | Discovery surfaces: diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index b41bca1..09428a6 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -150,6 +150,7 @@ This core file is enough for any single-command task. Load the relevant skill ** - **`mbql`** — authoring or fixing any MBQL query body: `mb query`, a card `dataset_query`, a transform `source.query`, a measure/segment `definition`, "aggregate and group by", reading `--dry-run` errors. The query-body reference. - **`viz`** — choosing a card's `display` and authoring `visualization_settings`: "make it a bar chart", "set the pie dimension/metric", "format this column as currency", "the card renders as a table instead of a chart". The presentation counterpart to `mbql`. - **`transform`** — "create a transform", "run a transform", authoring transform body JSON, run inspection. +- **`data-transformation`** — the higher-level workflow: turning a raw, normalized source database into a small set of clean, wide, analysis-ready tables for a non-technical user — "clean up", "flatten", "denormalize", "make sense of this database", "build analysis-ready tables". Wraps `transform` (the mechanics) with the investigate → propose → build flow. - **`semantic-layer`** — turning clean tables into reusable definitions: "make this filter reusable", "define active customers / net revenue / MRR officially", "create a segment / measure / metric", "so everyone uses the same definition". Builds on `mbql` (the definition bodies) and `transform` (widen a table first when a definition needs more than one). - **`git-sync`** — "import the latest changes", "export to git", "git sync", "dirty check", "stash before pulling". diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md new file mode 100644 index 0000000..38b38c3 --- /dev/null +++ b/skill-data/data-transformation/SKILL.md @@ -0,0 +1,147 @@ +--- +name: data-transformation +description: Turn a raw, normalized source database into a small set of clean, analysis-ready tables. Claude investigates the source, works out the real-world "things" the data is about (even when each one is scattered across several tables), decodes coded/JSON/translated values into readable text, and builds one wide, denormalized table per thing as Metabase transforms. Designed for a non-technical user who knows their domain. Use whenever someone wants to "clean up", "flatten", "denormalize", "make sense of", or "build analysis-ready tables from" a raw database. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Data Transformation + +Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world _thing_ the data is about, built as Metabase **transforms** the user can inspect. + +Drive everything through the `mb` CLI. First load the skills you'll need: + +```bash +mb skills get core # auth, profiles, db/table/field inspection, query +mb skills get mbql # if you build transform queries in MBQL +mb skills get transform # creating/running transforms, run inspection +``` + +Authentication is the user's job. Check `mb auth list --json`; use the one profile if there's one, ask which if there are several, ask them to log in if there are none. Pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open always matches the instance the CLI is hitting. + +--- + +## Who you're talking to + +A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: + +- **No modeling jargon.** Skip the warehouse vocabulary they won't know — grain, fact/dimension table, normalize, join, surrogate key, entity, materialize — and prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +- Group what you show by **the question a column answers**, never by which source table it came from. +- Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. + +--- + +## Two kinds of decisions + +Sort every choice into one of these. + +**Hard rules — absolutes, never ask:** + +1. Never flatten a multi-valued field into one opaque blob (e.g. three options jammed into `"email | phone | text"`). It destroys filterability, which is the whole point. +2. Never use jargon with the user. +3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. +4. Never guess what a column or code means from its name. Confirm against the actual values. +5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. +6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves _filtering_, this preserves _combining_. Keep the ids; just don't make the user stare at them.) + +**Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: + +- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable — a small companion table or a structured column, never opaque text. Structure is the user's call. Lean: whatever keeps filtering simplest. +- **Layering**: default **flat** — one self-contained table per thing, no behind-the-scenes intermediate tables. Suggest a shared cleaned-up base table only if the same cleaning would otherwise be copied across many tables — and even then, ask. +- **Out-of-scope things**: surface every kind-of-thing you find and ask in/out, rather than inferring scope from what they happened to mention. +- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with the _repeating_ events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) + +Phrase a prudential call as a lean plus a nod: + +> "I'd keep these as one simple table rather than splitting into behind-the-scenes pieces — easier to look through. Good?" + +--- + +## The process + +### Phase 0 — Investigate (quietly) + +Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. + +**Get oriented first.** As soon as you know which database and schema you're in: + +- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. the `open` / `xdg-open` command); if you can't, just paste the URL. +- **Ask for a head start.** "Do you have a picture or file showing how your data fits together?" If yes, read it — it shortcuts the next steps. +- **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. + +Then dig in: + +1. **Map the tables.** List them; pull each one's column names and types; note its own id. +2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `c_4471` is meaningless until you join the lookup and find it's _"Preferred contact method"_. Build a code → label map before showing the user anything. +3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. +4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. +5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. +6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. +7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one _customer_ across a main table + a loyalty table + custom-profile columns). Decide "one row per \_\_\_" for each and gather its attributes, decoded. Watch for a table that secretly mixes _two_ things — a stable thing plus its repeating events; that's the split in the prudential calls above. + +**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns by any attribute. **This is not goal-setting and you don't show it to the user or build any of it.** Its only purpose is to pressure-test your table design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or a finer grain _cheaply_ preserves that flexibility, keep it. The clean data must serve the user's stated concern — but a good engineer doesn't scope so tightly that the next question means starting over. + +### Phase 1 — Present what you found (plain language) + +Three things, in order: + +**(a) The things, in plain terms.** One short blurb each. E.g. in an online store: + +> **Customers** — one row per customer. Who they are (name, company, location), how they've been in touch, what they've spent, whether they're active or churned. + +**(b) The full inventory — including what you'd leave out.** Never infer scope silently: + +> I found 6 kinds of things: **Customers, Orders, Products, Suppliers, Shipments, Returns.** I'd build the first four. **Shipments** and **Returns** also have real data — want those in, or leave them? + +**(c) What would be set aside — proactively, ranked, two buckets:** + +> Nothing important is lost. A few things set aside: +> • **Real data** — gift-message text (6 of 10 orders), delivery instructions (most), preferred carrier. Minor, but real — want any kept? +> • **Safe to drop** — duplicate product names in other languages, internal bookkeeping columns. No real loss. + +If you spotted existing clean data to link to (step 3), raise it here too — and **always run a suspected match past the user before wiring it; never graft onto their existing data silently.** Then ask your prudential questions, one at a time, each a lean-plus-nod. + +### Phase 2 — Iterate + +Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. Re-confirm the final picture in one short recap. + +### Phase 3 — Build, check, hand back + +Build one wide transform per agreed thing. Each table: + +- **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. +- **Decoded**: codes and JSON become readable text; deleted/internal rows and bookkeeping columns are gone. +- **Clean, plain column names**, consistent across tables. +- **Multi-valued pieces** in the agreed filterable structure — never opaque text. +- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. + +Then make the links real, not just implied: + +- **Wire foreign keys between your tables.** Mark each linking id as a foreign key pointing at the id it references (`mb field update` — set the column's type to foreign-key and its target). Now Metabase itself knows the tables connect and can traverse them. +- **Graft onto existing clean data** the user approved (step 3 / Phase 1): point the linking id at the existing table's id the same way. Link, don't duplicate. + +When you start refining a built transform _with_ the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. + +**Check the output before handing back — the user can't.** After each transform runs, look at the actual data and run quick ad-hoc tests against what Phase 0 led you to expect: row counts in the right ballpark, decoded columns actually readable (no stray codes), linking ids that resolve to the other tables, no column unexpectedly all-null or blown up in count. Treat surprises as bugs to chase, not noise. A table that can't combine with the others — usually a dropped id, or the same id named two different ways — is a silent failure; catch it here. + +Then report plainly: + +> Done. Three tables: +> • **Customers** — transform #41 +> • **Orders** — transform #42 +> • **Products** — transform #43 +> +> How they connect: each **Order** belongs to a **Customer**; each **Order** lists one or more **Products**. + +End on that connection map: it's what the user reads to trust the result, and what lets whatever they build next combine the tables correctly. + +--- + +## A worked decode example (for your reference, not the user's) + +The shape recurs across SaaS exports, whatever the domain. A coded column — say `c_4471` on a responses table — means nothing alone. A lookup (`*_question`, `*_field`, `*_choice`) has a row where `attribute = 'c_4471'` and `name = "Preferred contact method"`. Single-select answers are often already `{"id":…, "value":"Email"}` — use `value`. Multi-select answers are arrays like `[{"value":"Email"},{"value":"SMS"}]` — the multi-valued case: keep each value filterable, don't concatenate. + +Always decode _before_ presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: + +- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. +- **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. +- **Normalize encodings once.** Turn raw representations clean in the table itself — signed amounts into clear positive numbers by kind, 0/1 into true/false — so nothing downstream re-derives them. diff --git a/tests/e2e/skills.e2e.test.ts b/tests/e2e/skills.e2e.test.ts index e5597dc..ef32545 100644 --- a/tests/e2e/skills.e2e.test.ts +++ b/tests/e2e/skills.e2e.test.ts @@ -9,6 +9,7 @@ import { cleanupConfigHome, mkTempConfigHome, runCli } from "./run-cli"; const BUNDLED_VISIBLE_NAMES = [ "core", + "data-transformation", "document", "git-sync", "mbql", @@ -30,7 +31,7 @@ describe("skills e2e", () => { return dir; } - it("list returns the seven bundled non-hidden skills, sorted by name", async () => { + it("list returns the eight bundled non-hidden skills, sorted by name", async () => { const result = await runCli({ args: ["skills", "list", "--json"], configHome: await makeIsolatedConfigHome(), From 580084df723c3987e6969e633bf3a9bceae1a213 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 12:16:54 -0600 Subject: [PATCH 03/31] Add robot-data-engineer parent router skill (working title) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the front-door router for the robot-data-scientist journey: a light wrapper that detects where the user is (raw data / clean tables / ready to chart), sets up auth + the autonomy slider once, then routes to the specialized child skill (data-transformation / semantic-layer / visualization) and hands off. Stays small by design — it dispatches, it doesn't do the work. Parent owns only the end-of-journey hard stop; children self-manage their in-stage gates. Name is a working title (robot-data-engineer), TBD before merge. - New skill-data/robot-data-engineer/SKILL.md (auto-discovered) - Cross-reference from the core skill's specialized-skills list - README bundled-skills table - e2e golden list (eight -> nine) --- README.md | 1 + skill-data/core/SKILL.md | 1 + skill-data/robot-data-engineer/SKILL.md | 79 +++++++++++++++++++++++++ tests/e2e/skills.e2e.test.ts | 3 +- 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 skill-data/robot-data-engineer/SKILL.md diff --git a/README.md b/README.md index 618196d..8ed76ac 100644 --- a/README.md +++ b/README.md @@ -1344,6 +1344,7 @@ Bundled skills: | `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | | `data-transformation` | Raw, normalized source database → clean, wide, analysis-ready tables for a non-technical user | | `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | +| `robot-data-engineer` | Front-door router for the whole journey (raw → tables → definitions → dashboards); name TBD | | `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | | `git-sync` | Round-tripping Metabase content to/from a git remote | diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index 09428a6..e9101f6 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -152,6 +152,7 @@ This core file is enough for any single-command task. Load the relevant skill ** - **`transform`** — "create a transform", "run a transform", authoring transform body JSON, run inspection. - **`data-transformation`** — the higher-level workflow: turning a raw, normalized source database into a small set of clean, wide, analysis-ready tables for a non-technical user — "clean up", "flatten", "denormalize", "make sense of this database", "build analysis-ready tables". Wraps `transform` (the mechanics) with the investigate → propose → build flow. - **`semantic-layer`** — turning clean tables into reusable definitions: "make this filter reusable", "define active customers / net revenue / MRR officially", "create a segment / measure / metric", "so everyone uses the same definition". Builds on `mbql` (the definition bodies) and `transform` (widen a table first when a definition needs more than one). +- **`robot-data-engineer`** — the front-door router for the whole journey (raw data → clean tables → reusable definitions → dashboards) for a non-technical user: "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst". Detects where the user is and routes to `data-transformation` / `semantic-layer` / `visualization`. (Working title — name TBD.) - **`git-sync`** — "import the latest changes", "export to git", "git sync", "dirty check", "stash before pulling". If a task spans more than one, load each. Specialized skills assume the conventions above and won't repeat them. `mb skills list` enumerates everything on the installed version. diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md new file mode 100644 index 0000000..c79e3f2 --- /dev/null +++ b/skill-data/robot-data-engineer/SKILL.md @@ -0,0 +1,79 @@ +--- +name: robot-data-engineer +description: The front door for turning a database into something a non-technical person can actually use — clean tables, reusable definitions, and dashboards — all through the `mb` CLI. This skill is a light router: it works out where the user is (raw data? clean tables already? ready to chart?), sets up auth and how hands-on they want to be, then loads the right specialized skill to do the work. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst / data engineer", "set up analytics for X", or otherwise asks for the whole journey rather than one specific step. (Working title — name TBD before merge.) +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Robot Data Engineer + +You're the **front door**, not the worker. Your job is to point the user at the right tool and get out of the way. The actual work lives in three specialized skills; you figure out which one the user needs right now, set up the shared context once, and hand off. Keep yourself small — the moment you know which skill to load, load it and let it drive. + +The journey, end to end, is three stages: + +1. **Raw data → clean tables** — the `data-transformation` skill. Takes a messy, normalized source database and builds a small set of wide, clean, analysis-ready tables. +2. **Clean tables → reusable definitions** — the `semantic-layer` skill. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. +3. **Tables/definitions → charts and dashboards** — the `visualization` skill. Builds the questions and dashboards people actually look at. + +Most users don't say which stage they want — they describe a goal. Your job is to map the goal to a stage, confirm you've got it right, and route. + +--- + +## Setup — do this once, up front + +Before routing, settle two things so the child skills don't have to re-ask: + +1. **Auth.** Check `mb auth list --json`. One profile → use it. Several → ask which. None → ask the user to log in (`mb auth login`), then proceed. Carry the chosen `--profile ` into everything. + +2. **How hands-on they want to be** (the autonomy slider). Ask once, plainly, and remember it for the whole session — tell the child skill which mode the user picked so they aren't asked again: + + > Quick thing — how hands-on do you want to be? + > • **Check with me on everything** — I'll run each step past you first. + > • **Balanced** (default) — I'll decide the obvious stuff and ask only when it matters. + > • **Just go** — I'll do what makes sense and show you the result. + +Two things you always own, regardless of mode and regardless of which child ran: + +- **When genuinely unsure, ask — never assume.** Pass this expectation down. +- **The final hard stop.** Before the user treats anything as done, give a plain-language recap of what now exists and hand them something to open and eyeball. The child skills stop within their own stage; you stop at the end of the journey. + +--- + +## Work out where they are, then route + +Don't make the user name a stage. Peek at the instance and read their goal, then meet them where they are. + +**Detect the starting state** (cheap — don't pull whole-warehouse rollups): + +- List databases/schemas (`mb db …`, `mb table list`). Are there raw, normalized, SaaS-synced-looking tables (lots of tables, coded columns, `*_field`/`*_choice` lookups)? Or are there already wide, clean, human-readable tables? +- Are there already segments/measures/metrics (`mb segment list`, `mb measure list`, `mb card list`)? Existing dashboards (`mb dashboard list`)? + +**Map goal + state to a skill:** + +| What the user wants / what's there | Load | +| -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| "Clean up / flatten / make sense of" raw, normalized data; no clean tables yet | `data-transformation` | +| Clean tables exist; "make this reusable", "define active customers / revenue / MRR officially", "so everyone uses the same definition" | `semantic-layer` | +| Tables (and maybe definitions) exist; "chart this", "build a dashboard", "show me X over time" | `visualization` | +| "Do the whole thing" / "set up analytics for X" from raw data | start at `data-transformation`, then continue down the journey (see below) | + +Load a skill with `mb skills get `. Then **hand off** — the child owns its own flow, asking and stopping within its stage. Don't narrate the child's work or duplicate its steps. + +**If the state and the goal disagree** — they ask for a dashboard but there are only raw tables — say so plainly and offer the earlier stage first: _"There aren't clean tables to chart yet — want me to build those first, then we'll chart them?"_ Don't silently build on raw data. + +--- + +## The whole journey + +When the user wants the full arc (raw → dashboard), run the three stages in order, handing off to each child in turn. Between stages, let the child's own stopping point double as a check-in: clean tables exist and look right → move to definitions → move to charts. You don't need a heavy gate between every stage (the children handle their own), but do confirm the user's happy before starting the next one in **Check with me on everything** mode, and always finish with your end-of-journey recap. + +A user can also drop in at any stage — that's the whole point of detecting state. Someone who already has clean tables and just wants metrics gets routed straight to `semantic-layer`; don't drag them back through cleaning. + +--- + +## Don't + +- **Don't do the children's work yourself.** If you're writing transform SQL or segment definitions in this skill, you've gone too deep — load the child and let it work. +- **Don't re-ask the autonomy question** once it's set; pass it down. +- **Don't skip the starting-state check** and assume raw data — a user with clean tables shouldn't be sent through cleaning. +- **Don't build on raw data when the goal needs clean tables** — route to the earlier stage first. +- **Don't drop the final recap** — you own the end-of-journey hard stop even though each child stops within its own stage. diff --git a/tests/e2e/skills.e2e.test.ts b/tests/e2e/skills.e2e.test.ts index ef32545..a6b8c47 100644 --- a/tests/e2e/skills.e2e.test.ts +++ b/tests/e2e/skills.e2e.test.ts @@ -13,6 +13,7 @@ const BUNDLED_VISIBLE_NAMES = [ "document", "git-sync", "mbql", + "robot-data-engineer", "semantic-layer", "transform", "visualization", @@ -31,7 +32,7 @@ describe("skills e2e", () => { return dir; } - it("list returns the eight bundled non-hidden skills, sorted by name", async () => { + it("list returns the nine bundled non-hidden skills, sorted by name", async () => { const result = await runCli({ args: ["skills", "list", "--json"], configHome: await makeIsolatedConfigHome(), From fabd4eb8a039dc5bb793222fdcde944e0d435457 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 12:18:08 -0600 Subject: [PATCH 04/31] Update data-transformation skill from upstream gist Sync Timothy's latest revision: two new hard rules (confirm non-obvious business rules in plain terms before baking them in; flag sensitive personal data rather than silently carrying it), a sensitive-data prudential call, and expanded guidance on decoding, soft-delete filtering, writing table/column descriptions back to Metabase, and one-pass encoding normalization. Co-authored-by: Timothy Dean <7650347+galdre@users.noreply.github.com> --- skill-data/data-transformation/SKILL.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 38b38c3..3c6286b 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -42,6 +42,8 @@ Sort every choice into one of these. 4. Never guess what a column or code means from its name. Confirm against the actual values. 5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. 6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves _filtering_, this preserves _combining_. Keep the ids; just don't make the user stare at them.) +7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back _out_), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") +8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: @@ -49,6 +51,7 @@ Sort every choice into one of these. - **Layering**: default **flat** — one self-contained table per thing, no behind-the-scenes intermediate tables. Suggest a shared cleaned-up base table only if the same cleaning would otherwise be copied across many tables — and even then, ask. - **Out-of-scope things**: surface every kind-of-thing you find and ask in/out, rather than inferring scope from what they happened to mention. - **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with the _repeating_ events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) +- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. Phrase a prudential call as a lean plus a nod: @@ -71,7 +74,7 @@ Don't narrate this — a single "Let me take a look at what's in here — one mi Then dig in: 1. **Map the tables.** List them; pull each one's column names and types; note its own id. -2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `c_4471` is meaningless until you join the lookup and find it's _"Preferred contact method"_. Build a code → label map before showing the user anything. +2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `c_4471` is meaningless until you join the lookup and find it's _"Preferred contact method"_. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. 3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. 4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. 5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. @@ -109,7 +112,7 @@ Cheap, because nothing's built. Adjust the set of things, what's kept, and the s Build one wide transform per agreed thing. Each table: - **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. -- **Decoded**: codes and JSON become readable text; deleted/internal rows and bookkeeping columns are gone. +- **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's delete flag — e.g. `_fivetran_deleted` — so tombstones never reach clean data). - **Clean, plain column names**, consistent across tables. - **Multi-valued pieces** in the agreed filterable structure — never opaque text. - **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. @@ -118,6 +121,7 @@ Then make the links real, not just implied: - **Wire foreign keys between your tables.** Mark each linking id as a foreign key pointing at the id it references (`mb field update` — set the column's type to foreign-key and its target). Now Metabase itself knows the tables connect and can traverse them. - **Graft onto existing clean data** the user approved (step 3 / Phase 1): point the linking id at the existing table's id the same way. Link, don't duplicate. +- **Write down what you learned.** You decoded every column's real meaning while investigating — save it: set a short description on each table and its non-obvious columns (`mb table update` / `mb field update`). The cleaned data then explains itself inside Metabase — in search, in the Question editor, to Metabot — instead of the knowledge living only in this chat. When you start refining a built transform _with_ the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. @@ -144,4 +148,4 @@ Always decode _before_ presenting, so the user sees "Preferred contact method", - **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. - **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. -- **Normalize encodings once.** Turn raw representations clean in the table itself — signed amounts into clear positive numbers by kind, 0/1 into true/false — so nothing downstream re-derives them. +- **Normalize encodings once.** Turn raw representations clean in the table itself, so nothing downstream re-derives them: signed amounts → clear positive numbers by kind, 0/1 → true/false, timestamps → one consistent timezone, text → trimmed and case-consistent, and junk placeholders (`"NULL"`, `"N/A"`, `"-"`, empty string) → real null. From 748e062c7cf8f69458b80e5d9dfad7112205422b Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 12:34:26 -0600 Subject: [PATCH 05/31] Update data-transformation skill from upstream gist Sync Timothy's latest revision: a new hard rule against overwriting an existing table or another transform's output (check the target name is free first), table-name agreement in the iterate phase (propose + confirm free before building), and a new cleaning checklist section whose governing rule is surface-what-you-find rather than silently fix it. Co-authored-by: Timothy Dean <7650347+galdre@users.noreply.github.com> --- skill-data/data-transformation/SKILL.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 3c6286b..a5d2d59 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -44,6 +44,7 @@ Sort every choice into one of these. 6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves _filtering_, this preserves _combining_. Keep the ids; just don't make the user stare at them.) 7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back _out_), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") 8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. +9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating _your own_ transform (`transform update`), never for clobbering another. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: @@ -78,7 +79,7 @@ Then dig in: 3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. 4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. 5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. -6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. +6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. Profile with the cleaning checklist (end of file) in mind — surface the quality smells you hit, don't silently fix them. 7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one _customer_ across a main table + a loyalty table + custom-profile columns). Decide "one row per \_\_\_" for each and gather its attributes, decoded. Watch for a table that secretly mixes _two_ things — a stable thing plus its repeating events; that's the split in the prudential calls above. **Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns by any attribute. **This is not goal-setting and you don't show it to the user or build any of it.** Its only purpose is to pressure-test your table design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or a finer grain _cheaply_ preserves that flexibility, keep it. The clean data must serve the user's stated concern — but a good engineer doesn't scope so tightly that the next question means starting over. @@ -105,7 +106,7 @@ If you spotted existing clean data to link to (step 3), raise it here too — an ### Phase 2 — Iterate -Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. Re-confirm the final picture in one short recap. +Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. ### Phase 3 — Build, check, hand back @@ -149,3 +150,21 @@ Always decode _before_ presenting, so the user sees "Preferred contact method", - **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. - **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. - **Normalize encodings once.** Turn raw representations clean in the table itself, so nothing downstream re-derives them: signed amounts → clear positive numbers by kind, 0/1 → true/false, timestamps → one consistent timezone, text → trimmed and case-consistent, and junk placeholders (`"NULL"`, `"N/A"`, `"-"`, empty string) → real null. + +--- + +## Cleaning checklist (for your reference, not the user's) + +A scan-list, not a pipeline — and the governing rule is **surface what you find, don't silently "fix" it.** Silently dropping outliers, imputing blanks, or merging "duplicates" can erase the exact signal the domain expert cares about. Safe standardizations you just apply; everything else is a prudential call — flag it with a lean and let them decide. + +**Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0); soft-deleted rows filtered, bookkeeping columns dropped. + +**Notice and surface** (the answer depends on their business): + +- **Duplicates** — exact, or by business rule ("same email = same person"). Never merge silently. +- **Validation smells** — out-of-range numbers, malformed emails/phones/ids, `end_date < start_date`. +- **Outliers** — values that read as data-entry errors. Flag, don't drop. +- **Missing data** — random vs. systematic? Surface the pattern; never silently impute or default. +- **Free text / mixed encodings** — handle the safe parts, flag the rest. + +Already covered by the rules above, listed so they stay on your radar: structural reshaping (decode/JSON/multi-value), orphans & key validity (Phase 0 step 5 + the post-run check), and recording meanings (the descriptions step). From 8cb6ef9df3aa10880ffe000c711e023317d09d4e Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Mon, 1 Jun 2026 15:07:45 -0600 Subject: [PATCH 06/31] Tweaks to data-transformation skill --- skill-data/data-transformation/SKILL.md | 69 +++++++++++-------------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index a5d2d59..f37f754 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -6,7 +6,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Data Transformation -Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world _thing_ the data is about, built as Metabase **transforms** the user can inspect. +Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. Drive everything through the `mb` CLI. First load the skills you'll need: @@ -18,15 +18,19 @@ mb skills get transform # creating/running transforms, run inspection Authentication is the user's job. Check `mb auth list --json`; use the one profile if there's one, ask which if there are several, ask them to log in if there are none. Pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open always matches the instance the CLI is hitting. +If you are making transforms, use the transform skill. + --- ## Who you're talking to A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: -- **No modeling jargon.** Skip the warehouse vocabulary they won't know — grain, fact/dimension table, normalize, join, surrogate key, entity, materialize — and prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +- **No modeling jargon.** Skip the warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — and prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +- Don't expect the user to understand raw SQL. - Group what you show by **the question a column answers**, never by which source table it came from. - Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. +- Your user probably says "go" and comes back later. **If you ever ask the user a question, wait for their answer.** --- @@ -35,101 +39,90 @@ A **non-technical user who knows their domain well** — they understand the bus Sort every choice into one of these. **Hard rules — absolutes, never ask:** - 1. Never flatten a multi-valued field into one opaque blob (e.g. three options jammed into `"email | phone | text"`). It destroys filterability, which is the whole point. 2. Never use jargon with the user. 3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. 4. Never guess what a column or code means from its name. Confirm against the actual values. -5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. -6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves _filtering_, this preserves _combining_. Keep the ids; just don't make the user stare at them.) -7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back _out_), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") +5. Never silently drop a whole *thing*. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. +6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about *related* things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves *filtering*, this preserves *combining*. Keep the ids; just don't make the user stare at them.) +7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back *out*), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") 8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. -9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating _your own_ transform (`transform update`), never for clobbering another. +9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating *your own* transform (`transform update`), never for clobbering another. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: - -- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable — a small companion table or a structured column, never opaque text. Structure is the user's call. Lean: whatever keeps filtering simplest. +- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable — a structured column for predefined lists, or a simple join table, never opaque text. Structure is the user's call. Lean: whatever keeps filtering simplest, very possibly flat. - **Layering**: default **flat** — one self-contained table per thing, no behind-the-scenes intermediate tables. Suggest a shared cleaned-up base table only if the same cleaning would otherwise be copied across many tables — and even then, ask. - **Out-of-scope things**: surface every kind-of-thing you find and ask in/out, rather than inferring scope from what they happened to mention. -- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with the _repeating_ events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) -- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. +- **A repeating thing vs. the events it takes part in**: one table can mix a *stable* thing (a customer, a company) with the *repeating* events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) +- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), *how* to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. Phrase a prudential call as a lean plus a nod: - > "I'd keep these as one simple table rather than splitting into behind-the-scenes pieces — easier to look through. Good?" --- ## The process -### Phase 0 — Investigate (quietly) - -Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. +### Phase 0 — Get Oriented **Get oriented first.** As soon as you know which database and schema you're in: - -- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. the `open` / `xdg-open` command); if you can't, just paste the URL. -- **Ask for a head start.** "Do you have a picture or file showing how your data fits together?" If yes, read it — it shortcuts the next steps. +- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. the `open` / `xdg-open` command); if you can't, just paste the URL. DO NOT SKIP THIS STEP. +- **Ask for a head start.** "Do you have a picture or file showing how your data fits together, like an ERD?" If yes, read it — it shortcuts the next steps. - **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. -Then dig in: +### Phase 1 — Investigate (quietly) +Then dig in. Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. 1. **Map the tables.** List them; pull each one's column names and types; note its own id. -2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `c_4471` is meaningless until you join the lookup and find it's _"Preferred contact method"_. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. -3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. +2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's *"Preferred vehicular transport"*. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. +3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to *link* to it, not duplicate it. 4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. 5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. 6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. Profile with the cleaning checklist (end of file) in mind — surface the quality smells you hit, don't silently fix them. -7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one _customer_ across a main table + a loyalty table + custom-profile columns). Decide "one row per \_\_\_" for each and gather its attributes, decoded. Watch for a table that secretly mixes _two_ things — a stable thing plus its repeating events; that's the split in the prudential calls above. +7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one *customer* across a main table + a loyalty table + custom-profile columns). Decide "one row per ___" for each and gather its attributes, decoded. Watch for a table that secretly mixes *two* things — a stable thing plus its repeating events; that's the split in the prudential calls above. -**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns by any attribute. **This is not goal-setting and you don't show it to the user or build any of it.** Its only purpose is to pressure-test your table design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or a finer grain _cheaply_ preserves that flexibility, keep it. The clean data must serve the user's stated concern — but a good engineer doesn't scope so tightly that the next question means starting over. +**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns by any attribute. **This is not goal-setting and you don't show it to the user or build any of it.** Its only purpose is to pressure-test your table design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or a finer grain *cheaply* preserves that flexibility, keep it. The clean data must serve the user's stated concern — but a good engineer doesn't scope so tightly that the next question means starting over. -### Phase 1 — Present what you found (plain language) +### Phase 2 — Present what you found (plain language) Three things, in order: **(a) The things, in plain terms.** One short blurb each. E.g. in an online store: - > **Customers** — one row per customer. Who they are (name, company, location), how they've been in touch, what they've spent, whether they're active or churned. **(b) The full inventory — including what you'd leave out.** Never infer scope silently: - > I found 6 kinds of things: **Customers, Orders, Products, Suppliers, Shipments, Returns.** I'd build the first four. **Shipments** and **Returns** also have real data — want those in, or leave them? **(c) What would be set aside — proactively, ranked, two buckets:** - > Nothing important is lost. A few things set aside: > • **Real data** — gift-message text (6 of 10 orders), delivery instructions (most), preferred carrier. Minor, but real — want any kept? > • **Safe to drop** — duplicate product names in other languages, internal bookkeeping columns. No real loss. If you spotted existing clean data to link to (step 3), raise it here too — and **always run a suspected match past the user before wiring it; never graft onto their existing data silently.** Then ask your prudential questions, one at a time, each a lean-plus-nod. -### Phase 2 — Iterate +### Phase 3 — Iterate Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. -### Phase 3 — Build, check, hand back +### Phase 4 — Build, check, hand back Build one wide transform per agreed thing. Each table: - - **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. - **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's delete flag — e.g. `_fivetran_deleted` — so tombstones never reach clean data). - **Clean, plain column names**, consistent across tables. - **Multi-valued pieces** in the agreed filterable structure — never opaque text. -- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. +- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine *beside* the rows, never *instead of* them — a frozen total only ever answers the one question it was summed for. Then make the links real, not just implied: - - **Wire foreign keys between your tables.** Mark each linking id as a foreign key pointing at the id it references (`mb field update` — set the column's type to foreign-key and its target). Now Metabase itself knows the tables connect and can traverse them. - **Graft onto existing clean data** the user approved (step 3 / Phase 1): point the linking id at the existing table's id the same way. Link, don't duplicate. - **Write down what you learned.** You decoded every column's real meaning while investigating — save it: set a short description on each table and its non-obvious columns (`mb table update` / `mb field update`). The cleaned data then explains itself inside Metabase — in search, in the Question editor, to Metabot — instead of the knowledge living only in this chat. -When you start refining a built transform _with_ the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. +When you start refining a built transform *with* the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. **Check the output before handing back — the user can't.** After each transform runs, look at the actual data and run quick ad-hoc tests against what Phase 0 led you to expect: row counts in the right ballpark, decoded columns actually readable (no stray codes), linking ids that resolve to the other tables, no column unexpectedly all-null or blown up in count. Treat surprises as bugs to chase, not noise. A table that can't combine with the others — usually a dropped id, or the same id named two different ways — is a silent failure; catch it here. Then report plainly: - > Done. Three tables: > • **Customers** — transform #41 > • **Orders** — transform #42 @@ -145,10 +138,9 @@ End on that connection map: it's what the user reads to trust the result, and wh The shape recurs across SaaS exports, whatever the domain. A coded column — say `c_4471` on a responses table — means nothing alone. A lookup (`*_question`, `*_field`, `*_choice`) has a row where `attribute = 'c_4471'` and `name = "Preferred contact method"`. Single-select answers are often already `{"id":…, "value":"Email"}` — use `value`. Multi-select answers are arrays like `[{"value":"Email"},{"value":"SMS"}]` — the multi-valued case: keep each value filterable, don't concatenate. -Always decode _before_ presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: - -- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. -- **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. +Always decode *before* presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: +- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come *from* the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. +- **Codes are usually specific to today's data.** `c_4471` exists only for *this* form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. - **Normalize encodings once.** Turn raw representations clean in the table itself, so nothing downstream re-derives them: signed amounts → clear positive numbers by kind, 0/1 → true/false, timestamps → one consistent timezone, text → trimmed and case-consistent, and junk placeholders (`"NULL"`, `"N/A"`, `"-"`, empty string) → real null. --- @@ -160,7 +152,6 @@ A scan-list, not a pipeline — and the governing rule is **surface what you fin **Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0); soft-deleted rows filtered, bookkeeping columns dropped. **Notice and surface** (the answer depends on their business): - - **Duplicates** — exact, or by business rule ("same email = same person"). Never merge silently. - **Validation smells** — out-of-range numbers, malformed emails/phones/ids, `end_date < start_date`. - **Outliers** — values that read as data-entry errors. Flag, don't drop. From ae1d025c23fc1bfbe05dc4c26ff6d396adab4634 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 15:12:14 -0600 Subject: [PATCH 07/31] Disambiguate skill altitude in descriptions Add a strategy-vs-mechanics carve-out to the trigger clause of the two strategy skills so the model picks the right altitude: - data-transformation: points single-transform work at the transform skill - semantic-layer: points raw segment/measure command mechanics at core Mirror transform's existing downward ref to core with an upward breadcrumb to data-transformation in its body. --- skill-data/data-transformation/SKILL.md | 2 +- skill-data/semantic-layer/SKILL.md | 2 +- skill-data/transform/SKILL.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index f37f754..1f953bf 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -1,6 +1,6 @@ --- name: data-transformation -description: Turn a raw, normalized source database into a small set of clean, analysis-ready tables. Claude investigates the source, works out the real-world "things" the data is about (even when each one is scattered across several tables), decodes coded/JSON/translated values into readable text, and builds one wide, denormalized table per thing as Metabase transforms. Designed for a non-technical user who knows their domain. Use whenever someone wants to "clean up", "flatten", "denormalize", "make sense of", or "build analysis-ready tables from" a raw database. +description: Turn a raw, normalized source database into a small set of clean, analysis-ready tables. Claude investigates the source, works out the real-world "things" the data is about (even when each one is scattered across several tables), decodes coded/JSON/translated values into readable text, and builds one wide, denormalized table per thing as Metabase transforms. Designed for a non-technical user who knows their domain. Use whenever someone wants to "clean up", "flatten", "denormalize", "make sense of", or "build analysis-ready tables from" a raw database. This is the strategy skill for modeling a whole database into a set of clean tables; for authoring or running one individual transform (body shape, flags, run inspection), use the `transform` skill instead. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- diff --git a/skill-data/semantic-layer/SKILL.md b/skill-data/semantic-layer/SKILL.md index bd9864e..5688784 100644 --- a/skill-data/semantic-layer/SKILL.md +++ b/skill-data/semantic-layer/SKILL.md @@ -1,6 +1,6 @@ --- name: semantic-layer -description: Turn clean, analysis-ready tables into a shared vocabulary everyone reuses — Metabase segments (saved filters like "active customers"), measures (saved calculations like "net revenue"), and metrics (official numbers like "monthly recurring revenue") — so people stop reinventing the same definitions five different ways. Find the questions people keep asking, propose segments and measures in plain language (teaching the Metabase terms as you go), graft them onto what the org already tracks, and build them via `mb segment create` / `mb measure create` / `mb card create`. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", "so everyone uses the same definition", "save this filter/calculation/metric for the team", or "create a segment / measure / metric". +description: Turn clean, analysis-ready tables into a shared vocabulary everyone reuses — Metabase segments (saved filters like "active customers"), measures (saved calculations like "net revenue"), and metrics (official numbers like "monthly recurring revenue") — so people stop reinventing the same definitions five different ways. Find the questions people keep asking, propose segments and measures in plain language (teaching the Metabase terms as you go), graft them onto what the org already tracks, and build them via `mb segment create` / `mb measure create` / `mb card create`. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", "so everyone uses the same definition", "save this filter/calculation/metric for the team", or "create a segment / measure / metric". This is the strategy skill for deciding which reusable definitions an org needs and designing them; for the raw `mb segment` / `mb measure` command mechanics (flags, body shape), use the `core` skill instead. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index 270686d..4008359 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion A **transform** persists the result of a query (native SQL or MBQL) to a warehouse table the user can read from cards, dashboards, and other transforms. It runs on a schedule (via `transform-job`) or on-demand (`transform run`). -This skill covers the create-and-run flow. The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). +This skill covers the create-and-run flow for one transform. The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding *which* transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). ## Body shape From 83d7834a11710e803b2255e31d7d9a293f8230a6 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Mon, 1 Jun 2026 15:36:15 -0600 Subject: [PATCH 08/31] Add data-analysis skill; route to it from robot-data-engineer New data-analysis sub-skill covers the fourth journey stage: answering real questions from clean tables and handing back a written report (distinct from charting, which stays in visualization). Wire it into the robot-data-engineer router's description, journey list, and route table. Also fixes a latent parse bug in the router frontmatter: an unquoted "light router: it works" made the YAML parser read the description as a mapping, so parseFrontmatter returned null and discoverSkills silently dropped the skill -- robot-data-engineer never appeared in `mb skills list`. Reworded the colon to an em-dash. --- skill-data/data-analysis/SKILL.md | 63 +++++++++++++++++++++++++ skill-data/robot-data-engineer/SKILL.md | 8 ++-- 2 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 skill-data/data-analysis/SKILL.md diff --git a/skill-data/data-analysis/SKILL.md b/skill-data/data-analysis/SKILL.md new file mode 100644 index 0000000..a47ceb5 --- /dev/null +++ b/skill-data/data-analysis/SKILL.md @@ -0,0 +1,63 @@ +--- +name: data-analysis +description: Answer real questions from clean, analysis-ready tables and hand back a plain-language report — not a chart-building task, an answer-finding one. Claude reads the available tables, turns the user's question ("who registered", "what did people say they want", "which option won") into queries, runs them against the live instance, sanity-checks the numbers, and writes up findings the user can read and trust. Works over tables that are already clean (wide, human-readable) — survey/registration answers, event signups, customer lists, anything where the user has a question and the data already holds the answer. Use whenever someone wants to "answer questions about my data", "report on who registered / signed up / responded", "what did people say", "analyze X", "explore this data", "find patterns", "summarize the responses", or "build me a report". For a non-technical user who knows their domain. This is the strategy skill for investigating clean data and reporting findings; if the question needs charts/dashboards instead of a written answer, use the `visualization` skill; if the tables are still raw and messy, use `data-transformation` first. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Data Analysis + +The user has a question and clean data that already holds the answer. Your job: find the answer, check it's right, and hand it back in plain language. You're an analyst, not a dashboard builder — the deliverable is a **trustworthy written answer**, optionally backed by a saved question they can re-open. + +This skill assumes the tables are already clean (wide, human-readable). If they're raw and normalized — lots of `*_field`/`*_choice` lookups, coded columns, JSON blobs — stop and route to `data-transformation` first; don't analyze on top of a mess. + +--- + +## The loop + +For each question the user asks: + +1. **Find where the answer lives.** List tables (`mb table list`, `mb db schema-tables `). Read the columns (`mb table fields `). Clean datasets often ship the same facts two ways — a **wide** table (one row per thing, easy to read) and a **long** table (one row per attribute, easy to aggregate over many-valued answers). Pick the one that fits the question: per-person facts → wide; "which option was most popular" across a multi-select → long. + +2. **Turn the question into a query.** Write it, run it (`mb query`). Start small — a `count(*)` and a couple of sample rows to confirm you're pointed at the right table and the columns mean what you think. Then write the real query. + +3. **Sanity-check before you believe it.** A number with no cross-check is a guess. Confirm row counts against a total you trust, watch for nulls/blanks inflating or deflating a percentage, and re-read the column you grouped on — a `type/Category` column with "confirmed"/"cancelled" means your "how many registered" answer depends on which statuses you counted. State the denominator. + +4. **Report in plain language.** Lead with the answer, then how you got it. Numbers get context ("9 of 10 confirmed"), not bare figures. For free-text answers, quote a few real responses rather than only counting them — the words are the value. + +--- + +## What to ask the user up front + +Don't over-interrogate, but settle the things that change the answer: + +- **Scope.** All-time or a window? Everyone, or only confirmed/active? A "how many registered" with no status filter and a "how many *confirmed*" are different numbers — pick the one they mean, and say which you used. +- **Cut.** Do they want the headline number, or the number broken down (by role, by company, by version)? A breakdown is usually one `GROUP BY` away and far more useful. +- **Form of the answer.** A number in chat? A short written digest? A saved question they can re-open and refilter? If they want something durable or visual, that's the `visualization` skill — hand off. + +When genuinely unsure which interpretation they mean, ask — never silently pick one and present it as the answer. + +--- + +## Survey / registration data — the common shape + +A lot of "analyze who registered / what did people say" work lands on event or survey data, which has a recognizable shape worth calling out: + +- A **per-registrant wide table** — name, company, role, status, plus one column per single-answer question. Use it for "who registered", rosters, breakdowns by role/version/company, and any per-person filter. +- A **long answers table** — one row per (registrant, question, answer). Use it for **multi-select** questions (one person picks several options, so they can't flatten into one wide column) and for "which option was chosen most". Group by the question text, then by the answer value. +- **Question definitions** — the catalog of what was asked, the answer choices, free-text vs single vs multi. Read this first to know which questions exist and how each is typed before you start counting. + +Three report families cover most asks: + +1. **Roster** — who registered, with the facts that matter (company, role, status). A filtered, ordered read of the wide table. +2. **Distribution** — how the group splits on a single-select (role, version, customer-or-not). A `GROUP BY` with counts; the agent-facing answer is "X% picked A, Y% picked B". +3. **Open-ended digest** — what people said in free-text ("what do you want to learn / teach / discuss"). Small N usually — list the actual answers, don't just count them; the responses are the point. + +--- + +## Don't + +- **Don't analyze raw, un-cleaned tables.** If the data is normalized/coded/JSON, route to `data-transformation` first and analyze the clean output. +- **Don't report a number you didn't sanity-check.** No denominator, no null-check → no answer. +- **Don't silently pick a scope.** "Registered" vs "confirmed", all-time vs window — state which you used, or ask. +- **Don't build charts/dashboards here.** A written answer (and maybe one saved question) is the deliverable; if they want it visual, that's `visualization`. +- **Don't only count free-text.** Quote the real responses — the words carry the insight a count throws away. diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index c79e3f2..5e45612 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -1,6 +1,6 @@ --- name: robot-data-engineer -description: The front door for turning a database into something a non-technical person can actually use — clean tables, reusable definitions, and dashboards — all through the `mb` CLI. This skill is a light router: it works out where the user is (raw data? clean tables already? ready to chart?), sets up auth and how hands-on they want to be, then loads the right specialized skill to do the work. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst / data engineer", "set up analytics for X", or otherwise asks for the whole journey rather than one specific step. (Working title — name TBD before merge.) +description: The front door for turning a database into something a non-technical person can actually use — clean tables, reusable definitions, dashboards, and answers to real questions — all through the `mb` CLI. This skill is a light router — it works out where the user is (raw data? clean tables already? ready to chart? just need a question answered?), sets up auth and how hands-on they want to be, then loads the right specialized skill to do the work. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "report on who registered / signed up / responded", "analyze X", "be my data analyst / data engineer", "set up analytics for X", or otherwise asks for the whole journey rather than one specific step. (Working title — name TBD before merge.) allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- @@ -8,13 +8,14 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion You're the **front door**, not the worker. Your job is to point the user at the right tool and get out of the way. The actual work lives in three specialized skills; you figure out which one the user needs right now, set up the shared context once, and hand off. Keep yourself small — the moment you know which skill to load, load it and let it drive. -The journey, end to end, is three stages: +The journey, end to end, is four stages: 1. **Raw data → clean tables** — the `data-transformation` skill. Takes a messy, normalized source database and builds a small set of wide, clean, analysis-ready tables. 2. **Clean tables → reusable definitions** — the `semantic-layer` skill. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. 3. **Tables/definitions → charts and dashboards** — the `visualization` skill. Builds the questions and dashboards people actually look at. +4. **Clean tables → answers and reports** — the `data-analysis` skill. Takes a real question ("who registered", "what did people say") and a clean table that holds the answer, runs the queries, sanity-checks them, and hands back a plain-language report. -Most users don't say which stage they want — they describe a goal. Your job is to map the goal to a stage, confirm you've got it right, and route. +Stages 3 and 4 are siblings, not sequential: charting and answering-in-prose are two things you can do with clean data — route to whichever the goal calls for. Most users don't say which stage they want — they describe a goal. Your job is to map the goal to a stage, confirm you've got it right, and route. --- @@ -54,6 +55,7 @@ Don't make the user name a stage. Peek at the instance and read their goal, then | "Clean up / flatten / make sense of" raw, normalized data; no clean tables yet | `data-transformation` | | Clean tables exist; "make this reusable", "define active customers / revenue / MRR officially", "so everyone uses the same definition" | `semantic-layer` | | Tables (and maybe definitions) exist; "chart this", "build a dashboard", "show me X over time" | `visualization` | +| Clean tables exist; "answer this question", "who registered", "what did people say", "analyze / report on / summarize X" (wants a written answer, not a chart) | `data-analysis` | | "Do the whole thing" / "set up analytics for X" from raw data | start at `data-transformation`, then continue down the journey (see below) | Load a skill with `mb skills get `. Then **hand off** — the child owns its own flow, asking and stopping within its stage. Don't narrate the child's work or duplicate its steps. From 3a6494db5fd7865a1da565416bf244c7706a0040 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 09:22:56 -0600 Subject: [PATCH 09/31] Add shared contract to router; point children at it Hoist the cross-cutting rules every child skill must follow into a single Shared Contract section in robot-data-engineer: audience, jargon list (avoid normalize/grain; ERD/foreign key fine; explain wide/long on first use), PII handling (ask before showing rows; default to aggregates), capability limits (name what the CLI can't do instead of erroring into raw SQL), the autonomy slider, and the final hard stop. Each child (data-analysis, data-transformation, semantic-layer, visualization) gets a top-of-file up-pointer: a one-line summary plus an instruction to load the router's Shared Contract. The summary stands on its own so a directly-invoked child still gets the gist if the pointer is skipped. Drop the duplicated autonomy-slider prompt from semantic-layer, keeping only its stage-specific application of the modes. --- skill-data/data-analysis/SKILL.md | 2 ++ skill-data/data-transformation/SKILL.md | 2 ++ skill-data/robot-data-engineer/SKILL.md | 25 +++++++++++++++++++++++++ skill-data/semantic-layer/SKILL.md | 13 ++++--------- skill-data/visualization/SKILL.md | 2 ++ 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/skill-data/data-analysis/SKILL.md b/skill-data/data-analysis/SKILL.md index a47ceb5..ca2b205 100644 --- a/skill-data/data-analysis/SKILL.md +++ b/skill-data/data-analysis/SKILL.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Data Analysis +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + The user has a question and clean data that already holds the answer. Your job: find the answer, check it's right, and hand it back in plain language. You're an analyst, not a dashboard builder — the deliverable is a **trustworthy written answer**, optionally backed by a saved question they can re-open. This skill assumes the tables are already clean (wide, human-readable). If they're raw and normalized — lots of `*_field`/`*_choice` lookups, coded columns, JSON blobs — stop and route to `data-transformation` first; don't analyze on top of a mess. diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 1f953bf..fc42d18 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Data Transformation +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: ask before showing PII row-by-row (names, emails, phones) — default to aggregates; when asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL; honor the autonomy mode the user picked. The jargon rules are spelled out in detail below (**Who you're talking to**). Full contract and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. Drive everything through the `mb` CLI. First load the skills you'll need: diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 5e45612..14a1ee9 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -39,6 +39,31 @@ Two things you always own, regardless of mode and regardless of which child ran: --- +## Shared Contract + +This is the single source for the rules every child skill follows. Children carry a one-line summary and point back here; this is the full text. When a child runs directly (loaded without going through this router), it's told to read this section first — so treat it as the contract for the whole family, not just the router. + +**Who you're talking to.** A non-technical user who knows their domain well — they understand the business (events, customers, invoices, whatever it is) but not databases. Talk in their terms. + +**Jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, normalize, denormalize, surrogate key, materialize — and prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is". But don't overdo it: they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key, cardinality. **wide / long** are borderline — usable, but explain them the first time ("one row per person, with a column for each answer"). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. + +**PII.** Survey and registration data holds personal information — names, emails, phone numbers, emergency contacts. Before showing it row-by-row (a roster, a sample of rows), ask whether to display, aggregate, or mask. Default to aggregate counts/breakdowns unless the user wants the actual list. + +**Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI *can* do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. + +**Autonomy slider.** Ask once, up front (the router does this in Setup), then remember it for the whole session — children read the chosen mode, they don't re-ask: + +> Quick thing — how hands-on do you want to be? +> • **Check with me on everything** — I'll run each step past you first. +> • **Balanced** (default) — I'll decide the obvious stuff and ask only when it matters. +> • **Just go** — I'll do what makes sense and show you the result. + +**When genuinely unsure, ask — never assume.** + +**The final hard stop.** Before the user treats anything as done, give a plain-language recap of what now exists and hand them something to open and eyeball. + +--- + ## Work out where they are, then route Don't make the user name a stage. Peek at the instance and read their goal, then meet them where they are. diff --git a/skill-data/semantic-layer/SKILL.md b/skill-data/semantic-layer/SKILL.md index 5688784..6d0a6a6 100644 --- a/skill-data/semantic-layer/SKILL.md +++ b/skill-data/semantic-layer/SKILL.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Semantic Layer +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + Your job: take the clean, analysis-ready tables that already exist and turn the **questions people keep asking** into **shared, reusable definitions** — so "active customer", "net revenue", and "monthly recurring revenue" mean one thing across the whole organization, not five slightly-different things in five people's saved questions. You build three kinds of reusable thing. These are real Metabase features with real names — **use the Metabase names** (segment, measure, metric) and teach them to the user as you go. They're product vocabulary, not jargon. Pair the name with a plain gloss the first time, then use it freely: @@ -39,16 +41,9 @@ A **non-technical user who knows their domain well.** They know the business — --- -## Autonomy — let the user set how much you check in - -People differ on how much they want to be asked. Offer a **slider** once, near the start, in plain terms, then honor it for the rest of the session: - -> Quick thing — how hands-on do you want to be? -> • **Check with me on everything** — I'll run each definition past you before I build it. -> • **Balanced** (default) — I'll decide the obvious stuff myself and ask you only when it genuinely matters. -> • **Just go** — build what makes sense and show me the whole set at the end. +## Autonomy — honor the mode the user set -Map it to behavior: +The user already picked an autonomy mode (the router's Shared Contract asks the slider once, up front — don't re-ask). Apply it to building definitions: | Mode | What you do | | ----------------------- | ----------------------------------------------------------------------------------------------------------- | diff --git a/skill-data/visualization/SKILL.md b/skill-data/visualization/SKILL.md index 6cb6fc8..328d671 100644 --- a/skill-data/visualization/SKILL.md +++ b/skill-data/visualization/SKILL.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Visualization: pick the chart, then set it +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + A card has two presentation fields alongside its `dataset_query`: - **`display`** — the chart type (`bar`, `line`, `pie`, `scalar`, `map`, `table`, …). One closed set; pick from the enum below. From 964b2722d43bef90b2ee8695066e0bd09be07c68 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 12:26:45 -0600 Subject: [PATCH 10/31] Robot Data Analysts should give more context --- skill-data/robot-data-engineer/SKILL.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 14a1ee9..f7811b3 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -60,6 +60,18 @@ This is the single source for the rules every child skill follows. Children carr **When genuinely unsure, ask — never assume.** +**Questions must carry their own context.** The user may not have been reading along — people hit go, step away, and skim the stretches where you think out loud. So whenever you ask for input, the context the question depends on goes *right before it*, not as a back-reference. "Given the mismatch I found earlier, what would you like to do?" forces a scroll-back; lead with a short recap instead: + +> I have a question for you — quick recap so it makes sense: +> +> - I found a mismatch in ... +> - This matters because ... +> - Here's what I was thinking, but I need to check ... +> +> The question. + +Recap only the few points the question turns on — enough to answer cold, not a replay of everything you did. + **The final hard stop.** Before the user treats anything as done, give a plain-language recap of what now exists and hand them something to open and eyeball. --- From 152bc54ba64f3de6e264c706418c790a8bec1b7e Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 12:00:43 -0600 Subject: [PATCH 11/31] Harden robot-data-engineer skills from demo feedback Add three cross-cutting rules to the router's Shared Contract, drawn from two live demo runs (Swoogo, Luma): - Permission-denied discipline: on a denied query, stop -- never silently substitute a different readable table and pass its numbers off as the answer (the incident where an Account-table question got answered with Salesforce data). Diagnose the likely cause in plain terms, offer to search for a readable look-alike, surface any match as a confirm question, and hand control back -- no GRANT statements, no profile-switching Claude can't reliably execute. - Scratch files go in ./.scratch, never /tmp (better perms, persists, user-reviewable). Swept the /tmp examples in core, transform, document, and mbql to match. - Talking to the user: don't reference things they never saw, assume they read only the last ~30 lines, give questions full context, keep permission requests to one plain sentence. Rework the router's discovery section to ask the user where the data lives before crawling (asymmetry: name a db -> ask the schema; name a table -> ask the db), give the efficient command ladder, and offer a sync when a table is missing. De-duplicate auth: core's Auth & profiles section is the single source; the router keeps one line (it's the front door, may run before core loads) and data-transformation defers to core. --- skill-data/core/SKILL.md | 6 +++-- skill-data/data-transformation/SKILL.md | 2 +- skill-data/document/SKILL.md | 8 +++---- skill-data/mbql/SKILL.md | 2 +- skill-data/robot-data-engineer/SKILL.md | 32 +++++++++++++++++++++---- skill-data/transform/SKILL.md | 16 ++++++------- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index e9101f6..10994d2 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -97,14 +97,16 @@ Verbs that take a payload accept it from one of four sources, **first non-empty Picking exactly one is required; passing two of `--body` + `--file` + `--stdin` is rejected with a `ConfigError`. ```bash -cat > /tmp/body.json <<'EOF' +cat > ./.scratch/body.json <<'EOF' { ... } EOF -mb create --file /tmp/body.json --profile --json +mb create --file ./.scratch/body.json --profile --json ``` Single-quoted `'EOF'` prevents the shell from interpolating `$vars` inside the JSON. +Write these working files to **`./.scratch`** in the current directory (`mkdir -p ./.scratch` first), never `/tmp` — better permissions, they persist across the session, and the user can open and review them. + ## Discover the full surface: `mb __manifest` For the canonical, machine-readable inventory of every command — name, description, per-command `details`, examples, every flag with type and default, and the output JSON Schema — run: diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index fc42d18..bb7f91c 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -18,7 +18,7 @@ mb skills get mbql # if you build transform queries in MBQL mb skills get transform # creating/running transforms, run inspection ``` -Authentication is the user's job. Check `mb auth list --json`; use the one profile if there's one, ask which if there are several, ask them to log in if there are none. Pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open always matches the instance the CLI is hitting. +Authentication is the user's job — pick the profile per `core`'s **Auth & profiles** section and pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open always matches the instance the CLI is hitting. If you are making transforms, use the transform skill. diff --git a/skill-data/document/SKILL.md b/skill-data/document/SKILL.md index c3b9862..381fab9 100644 --- a/skill-data/document/SKILL.md +++ b/skill-data/document/SKILL.md @@ -145,10 +145,10 @@ Each entry in `cards` needs at least `{name, dataset_query, display, visualizati `update` replaces the whole `document` body, so the safe loop is **read → edit → write**. A fetched body already carries `_id`s on its id-bearing nodes, so preserve them — only mint new ones for id-bearing nodes you add: ```bash -mb document get --full --profile --json | jq '.document' > /tmp/body.json -# edit /tmp/body.json (add nodes — give each new id-bearing node a fresh `mb uuid` _id) … -jq -n --slurpfile d /tmp/body.json '{document: $d[0]}' > /tmp/patch.json -mb document update --file /tmp/patch.json --profile --json +mb document get --full --profile --json | jq '.document' > ./.scratch/body.json +# edit ./.scratch/body.json (add nodes — give each new id-bearing node a fresh `mb uuid` _id) … +jq -n --slurpfile d ./.scratch/body.json '{document: $d[0]}' > ./.scratch/patch.json +mb document update --file ./.scratch/patch.json --profile --json ``` Don't hand-merge a partial node tree into a live document — pull the current `document`, mutate the array, and PUT the whole thing back. To rename without touching the body, patch only `name`: `mb document update --body '{"name":"New title"}'`. diff --git a/skill-data/mbql/SKILL.md b/skill-data/mbql/SKILL.md index 3518d68..8c19ace 100644 --- a/skill-data/mbql/SKILL.md +++ b/skill-data/mbql/SKILL.md @@ -75,7 +75,7 @@ mb uuid --count 2 --json # mint only the clauses you actually reference `mb query` is the canonical authoring surface. Three modes: ```bash -mb query --print-schema --profile > /tmp/mbql-schema.json # 1. fetch the schema +mb query --print-schema --profile > ./.scratch/mbql-schema.json # 1. fetch the schema mb query --file q.json --dry-run --profile # 2. validate, no network mb query --file q.json --profile --json # 3. validate + run ``` diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index f7811b3..1ecafd2 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -23,7 +23,7 @@ Stages 3 and 4 are siblings, not sequential: charting and answering-in-prose are Before routing, settle two things so the child skills don't have to re-ask: -1. **Auth.** Check `mb auth list --json`. One profile → use it. Several → ask which. None → ask the user to log in (`mb auth login`), then proceed. Carry the chosen `--profile ` into everything. +1. **Auth.** Pick the profile per `core`'s **Auth & profiles** section — `mb auth list --json`; one → use it, several → ask which, none → ask the user to `mb auth login` — then carry `--profile ` into everything. (That's the canonical recipe; this one line is here because the router is the front door and may run before `core` is loaded.) 2. **How hands-on they want to be** (the autonomy slider). Ask once, plainly, and remember it for the whole session — tell the child skill which mode the user picked so they aren't asked again: @@ -51,6 +51,22 @@ This is the single source for the rules every child skill follows. Children carr **Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI *can* do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. +**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a *different* readable table and present its numbers as the answer (that's how a question about the Account table gets silently answered with Salesforce data). Instead, in order: + +1. **Stop.** Don't substitute another table and pass it off as the answer. +2. **Surface and diagnose in plain, friendly terms.** Name what was denied and the likely reason. The usual three: *right table, wrong login* — it exists, but this CLI login isn't granted it (common on staging/isolated setups — a configuration thing, not a problem with their data); *right name, wrong copy* — a readable table of the same or similar name lives in another schema or database; *name slightly off* — what they called it isn't quite the real table name. For example: "I can't read `analytics.account` — this login doesn't have access to it. That's usually a staging-permissions thing, not a problem with your data." +3. **Offer to search — don't auto-crawl.** Ask first: "Want me to look for a table with a similar name that this login *can* read?" Only on yes, run `mb search ` / `mb table list`, and surface any match as a **confirm question**, never as a substituted answer: "There's `dbt_models.account` I can read — did you mean that one?" +4. **Hand control back.** Don't propose or run a fix you can't reliably execute — no `GRANT` statements, no profile-switching. The recovery is the user's call. + +**Scratch files.** Working files — transform/query/patch JSON bodies, notes — go in `./.scratch` in the current working directory, **never `/tmp`**. Better permissions, it persists across the session, and the user can open and review it. `mkdir -p ./.scratch` if it isn't there yet. + +**Talking to the user.** Four habits, because the last few demo runs slipped on them: + +- **Don't reference things they never saw.** If *you* built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. +- **Assume they read only the last ~30 lines.** Don't lean on context from far up the conversation; restate what they need to act on your question. +- **Questions carry their own context.** Ask the whole question, not shorthand — "Which status counts as registered — confirmed only, or everyone?" not "confirmed or all?". +- **Plain permission requests.** Don't paste a wall of SQL or JSON and ask "run this?". Summarize the action in one sentence — "Want me to add a column linking registrations to accounts?" — and offer to show the details if they ask. + **Autonomy slider.** Ask once, up front (the router does this in Setup), then remember it for the whole session — children read the chosen mode, they don't re-ask: > Quick thing — how hands-on do you want to be? @@ -78,12 +94,18 @@ Recap only the few points the question turns on — enough to answer cold, not a ## Work out where they are, then route -Don't make the user name a stage. Peek at the instance and read their goal, then meet them where they are. +Don't make the user name a *stage* — but do find out *where their data lives* before you go looking for it. + +**Ask before you crawl.** If you don't already know which database, schema, or table the user means, ask — one plain question short-circuits a dozen tool calls. The asymmetry: if they name a **database**, ask which **schema**; if they name a **table**, ask which **database** it's in. "If you don't know, no problem — I'll look" is the fallback, not the opening move. Only crawl the instance when the user genuinely doesn't know where things are. + +**When you do crawl — the efficient ladder** (cheap, narrowest-first; never pull whole-warehouse rollups): -**Detect the starting state** (cheap — don't pull whole-warehouse rollups): +- Walk down: `mb db list` → `mb db schemas ` → `mb db schema-tables ` → `mb table list [--db-id]` → `mb table fields ` / `mb table metadata `. +- Have a *name* to look for rather than a tree to walk? Use `mb search [--models] [--db-id]` instead of crawling. +- Need to know what's actually in a column? `mb field summary ` (row/distinct counts) and `mb field values ` (sample values). +- **If a database looks freshly connected, or a table the user expects isn't showing up, offer to sync** — `mb db sync-schema --wait` — before concluding the table doesn't exist. -- List databases/schemas (`mb db …`, `mb table list`). Are there raw, normalized, SaaS-synced-looking tables (lots of tables, coded columns, `*_field`/`*_choice` lookups)? Or are there already wide, clean, human-readable tables? -- Are there already segments/measures/metrics (`mb segment list`, `mb measure list`, `mb card list`)? Existing dashboards (`mb dashboard list`)? +**Then read the shape to pick a stage.** Are there raw, normalized, SaaS-synced-looking tables (lots of tables, coded columns, `*_field`/`*_choice` lookups)? Or already wide, clean, human-readable ones? Any segments/measures/metrics (`mb segment list`, `mb measure list`, `mb card list`) or dashboards (`mb dashboard list`)? **Map goal + state to a skill:** diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index 4008359..a512688 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -24,7 +24,7 @@ For an **MBQL 5** `source.query` (`lib/type: "mbql/query"`), the body shape, the ## Create + run (native SQL) ```bash -cat > /tmp/transform.json <<'EOF' +cat > ./.scratch/transform.json <<'EOF' { "name": "user_counts_by_signup_year", "description": "Sample transform: counts users by year of signup", @@ -47,7 +47,7 @@ cat > /tmp/transform.json <<'EOF' } EOF -TRANSFORM_ID=$(mb transform create --file /tmp/transform.json --profile --json | jq -r '.id') +TRANSFORM_ID=$(mb transform create --file ./.scratch/transform.json --profile --json | jq -r '.id') mb transform run "$TRANSFORM_ID" --wait --profile --json ``` @@ -131,12 +131,12 @@ Right shape — patch only what changes: mb transform update --body '{"name":"renamed"}' --profile --json # Rewrite the SQL only: -cat > /tmp/patch.json <<'EOF' +cat > ./.scratch/patch.json <<'EOF' { "source": { "type": "query", "query": { "type": "native", "database": , "native": { "query": "SELECT … FROM public.orders" } } } } EOF -mb transform update --file /tmp/patch.json --profile --json +mb transform update --file ./.scratch/patch.json --profile --json # Change tag membership (note: tag_ids, not tags): mb transform update --body '{"tag_ids":[1,3]}' --profile --json @@ -148,7 +148,7 @@ If you really must round-trip, project to the writable subset: mb transform get --full --profile --json \ | jq '{name, description, source, target, run_trigger, tag_ids, collection_id, owner_user_id, owner_email} | with_entries(select(.value != null))' \ - > /tmp/patch.json + > ./.scratch/patch.json ``` ## Iterating on a failing transform @@ -163,17 +163,17 @@ Recipe: ```bash # 1. Try once -ID=$(mb transform create --file /tmp/t.json --profile --json | jq -r '.id') +ID=$(mb transform create --file ./.scratch/t.json --profile --json | jq -r '.id') mb transform run "$ID" --wait --profile --json # → failed # 2. Fix the body in place; PATCH only what changed. # Source-only patch — keeps name, target, tags untouched on the server. -cat > /tmp/source-patch.json <<'EOF' +cat > ./.scratch/source-patch.json <<'EOF' { "source": { "type": "query", "query": { "type": "native", "database": , "native": { "query": "" } } } } EOF -mb transform update "$ID" --file /tmp/source-patch.json --profile --json +mb transform update "$ID" --file ./.scratch/source-patch.json --profile --json # 3. Re-run mb transform run "$ID" --wait --profile --json # → succeeded From 68999343d8b04551dae0f742e0643e6b0f3dd626 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 12:44:00 -0600 Subject: [PATCH 12/31] Adopt skillsaw; clear all 17 lint warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire skillsaw (uvx) as a deterministic linter for the skill collection and clear every warning it reported: - Content quality: reword two weak-language hedges (ideally/correctly) to concrete behavior; flip the two negative-only "Don't" items (mbql, robot-data-engineer) to lead with the positive action. - Descriptions: compress the four over-long ones (robot-data-engineer, mbql, semantic-layer, data-analysis) under the 1024-char / 200-token limits, keeping the distinctive trigger phrases and dropping only redundant ones. No unquoted colon-space (would break frontmatter parse). - Bodies: a précis pass over the seven over-budget skills -- cut restated lead-ins, filler transitions, emphasis padding, and prose that merely restated an adjacent code block. Every rule, command, footgun, and worked example is kept; the dense skills were already mostly substance. Add .skillsaw.yaml pinned to 0.11.4 with an honest token ceiling (skill.warn 5100 -- above the largest leaf skill's de-fluffed floor, still catching real future bloat) and skill-description.warn 200. Add a strict skillsaw job to the Lint workflow. --- .github/workflows/lint.yml | 9 ++++++ .skillsaw.yaml | 23 +++++++++++++++ skill-data/core/SKILL.md | 36 +++++++++-------------- skill-data/data-analysis/SKILL.md | 2 +- skill-data/data-transformation/SKILL.md | 18 ++++++------ skill-data/mbql/SKILL.md | 38 ++++++++++++------------- skill-data/robot-data-engineer/SKILL.md | 28 +++++++++--------- skill-data/semantic-layer/SKILL.md | 14 ++++----- skill-data/transform/SKILL.md | 16 +++++------ skill-data/visualization/SKILL.md | 6 ++-- 10 files changed, 107 insertions(+), 83 deletions(-) create mode 100644 .skillsaw.yaml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 42eb365..1f7a8ca 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -35,3 +35,12 @@ jobs: bun-version: latest - run: bun install - run: bun run format:check + + skills: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: stbenjam/skillsaw@v0 + with: + path: skill-data + strict: true diff --git a/.skillsaw.yaml b/.skillsaw.yaml new file mode 100644 index 0000000..53d5328 --- /dev/null +++ b/.skillsaw.yaml @@ -0,0 +1,23 @@ +# skillsaw configuration — https://github.com/stbenjam/skillsaw +# Lints the skill collection under skill-data/. Pinned so a skillsaw +# release can't silently change token math and break CI. + +version: "0.11.4" + +rules: + context-budget: + enabled: true + severity: warning + limits: + # Skill bodies are loaded into Claude's context when a skill fires. + # The default warn (3000) is tuned for lean skills; ours are dense + # reference/strategy skills whose de-fluffed floor is higher. The + # always-resident skills (core, robot-data-engineer) are the tightest + # of the set; the larger ones (data-transformation, semantic-layer, + # mbql) are leaf skills loaded for a single stage, where the extra + # tokens are genuine guidance, not fluff. 5100 clears the largest + # honest floor while still catching real future bloat. + skill: + warn: 5100 + skill-description: + warn: 200 diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index 10994d2..bddc3e8 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -6,7 +6,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # metabase-cli (core) -The official Metabase CLI (`mb`) drives a Metabase instance over its REST API. It covers auth, list/get/create/update/delete on every resource, query and transform execution, content search, git-sync (representations ↔ instance), and entity-id translation. +The official Metabase CLI (`mb`) drives a Metabase instance over its REST API: auth, list/get/create/update/delete on every resource, query and transform execution, content search, git-sync (representations ↔ instance), and entity-id translation. Top-level command groups (run `mb --help` to discover verbs): @@ -15,7 +15,7 @@ auth | db | table | field | query | card | dashboard | snippet | segment | measu document | transform | transform-job | setting | search | git-sync | setup | eid | uuid | upgrade | skills ``` -The patterns below — auth, flag conventions, output flags, body input — apply across **every** group. Per-command flags, examples, and output schemas live in `mb __manifest` (see below). A few flows have their own specialized skills; load them on demand (see "Specialized skills"). Authoring any query body (cards, transforms, measures, segments, ad-hoc `mb query`) is one — load `mbql` whenever you build MBQL by hand. When a **question** (card) needs a query, prefer MBQL over native SQL — it's portable across warehouse engines and the CLI pre-flight-validates it. Try it first, but don't force it: fall back to native SQL when MBQL can't express the query, or when an MBQL body keeps failing server-side and you can't resolve it. +The patterns below — auth, flag conventions, output flags, body input — apply across **every** group. Per-command flags, examples, and output schemas live in `mb __manifest` (see below). A few flows have their own specialized skills (see "Specialized skills"). When a card needs a query, prefer MBQL over native SQL (portable, pre-flight-validated) — load `mbql`; fall back to native SQL when MBQL can't express it. ## Auth & profiles @@ -29,11 +29,11 @@ mb auth status --json # → {profile, present, url} for the d mb auth status --profile --json # → status of a specific profile ``` -`auth list` is the primary enumeration path — one call returns every configured profile with sanitized URL, an `authenticated` flag, and a probe `status` (`ok` / `auth-failed` / `network-error` / `server-error` / `not-probed`). Use it before asking the user which profile to pick. If it returns an empty `data: []`, ask the user to run `mb auth login` themselves (see the policy above) and tell you the profile name. `auth status` is a single-profile health probe when you already know the name. +`auth list` is the primary enumeration path — one call returns every configured profile with sanitized URL, an `authenticated` flag, and a probe `status` (`ok` / `auth-failed` / `network-error` / `server-error` / `not-probed`). Use it before asking which profile to pick. If it returns an empty `data: []`, ask the user to run `mb auth login` themselves (see the policy above) and tell you the profile name. `auth status` is a single-profile health probe when you already know the name. ### Pick the profile to use -If exactly one profile is configured and the user's intent doesn't disambiguate, use it. If multiple profiles exist and the user hasn't named one, ask via `AskUserQuestion`, presenting the names from `auth list`. Once a name is established, pass `--profile ` to **every** subsequent command. Profile names are arbitrary local labels — `prod`, `staging` — let the user pick. +If exactly one profile is configured and intent doesn't disambiguate, use it. If multiple exist and the user hasn't named one, ask via `AskUserQuestion`, presenting the names from `auth list`. Once a name is established, pass `--profile ` to **every** subsequent command. Profile names are arbitrary local labels — `prod`, `staging` — let the user pick. ## Flag conventions @@ -52,14 +52,12 @@ If exactly one profile is configured and the user's intent doesn't disambiguate, ### Some outputs are JSON envelopes, not bare strings -A handful of "lookup" verbs return a JSON object even when you only want a single field. `mb setting get ` returns `{"key": "...", "value": ...}`, not the bare value. Don't drop them raw into another flag — extract: +A handful of "lookup" verbs return a JSON object even for a single field. `mb setting get ` returns `{"key": "...", "value": ...}`, not the bare value. Extract before reusing: ```bash VALUE=$(mb setting get --json | jq -r '.value') ``` -If you find yourself piping a `--json` envelope straight into another flag and the receiving command rejects it, this is what happened. - ## Output Every list/get verb supports the same output flags: @@ -94,7 +92,7 @@ Verbs that take a payload accept it from one of four sources, **first non-empty 3. stdin (auto-detected when piped, or explicit `--stdin` where supported) 4. positional argument -Picking exactly one is required; passing two of `--body` + `--file` + `--stdin` is rejected with a `ConfigError`. +Exactly one required; passing two of `--body` + `--file` + `--stdin` is rejected with a `ConfigError`. ```bash cat > ./.scratch/body.json <<'EOF' @@ -105,17 +103,17 @@ mb create --file ./.scratch/body.json --profile --json Single-quoted `'EOF'` prevents the shell from interpolating `$vars` inside the JSON. -Write these working files to **`./.scratch`** in the current directory (`mkdir -p ./.scratch` first), never `/tmp` — better permissions, they persist across the session, and the user can open and review them. +Write these working files to **`./.scratch`** in the current directory (`mkdir -p ./.scratch` first), never `/tmp` — better permissions, they persist across the session, and the user can review them. ## Discover the full surface: `mb __manifest` -For the canonical, machine-readable inventory of every command — name, description, per-command `details`, examples, every flag with type and default, and the output JSON Schema — run: +The canonical, machine-readable inventory of every command — name, description, per-command `details`, examples, every flag with type and default, and the output JSON Schema: ```bash mb __manifest ``` -The leading `__` hides it from `--help`, but it's stable. Reach for it instead of `--help` per command. It pairs with `jq`: +The leading `__` hides it from `--help`, but it's stable. Reach for it instead of `--help` per command — to enumerate verbs, validate flag names before constructing a command, or read an output schema before parsing. Pairs with `jq`: ```bash mb __manifest | jq -r '.commands[].command' # every command name @@ -124,30 +122,28 @@ mb __manifest | jq '.commands[] | select(.command == "card query") | .args' mb __manifest | jq '.commands[] | select(.command == "card list") | .outputSchema' # output schema before parsing ``` -Use it to (a) enumerate verbs, (b) validate flag names before constructing a command, (c) read an output schema before parsing. - ## Resource quirks worth memorizing -Routine verb shapes (list / get / create / update), every flag, and output JSON Schemas live in `mb __manifest` — pull them on demand. Below is only what the manifest does _not_ tell you: the footguns and non-obvious behaviors. +Routine verb shapes (list / get / create / update), every flag, and output JSON Schemas live in `mb __manifest` — pull on demand. Below is only what the manifest does _not_ tell you: footguns and non-obvious behaviors. - **db traversal vs. rollup.** Default to granular: `database list` → `database schemas ` → `database schema-tables ` → `table get --include fields`. The rollup endpoints (`database get --include tables.fields`, `database metadata `) pull megabytes and blow the context window on any real warehouse — use them only on a small/dev db. `sync-schema` / `rescan-values` queue async work and return `{status:"ok"}` immediately; `sync-schema --wait` blocks until `initial_sync_status: complete`. - **table fields.** `table get` never returns fields on its own — pass `--include fields` (compact) or use `table fields ` (list envelope). `table metadata ` adds FKs + dimensions (heavier). `table update` patches table-level metadata only; physical columns aren't editable here. - **field has no `list`.** Fields are per-table — get them via `table get --include fields`. Never enumerate fields across a whole db (context blow-up). `field summary` is live cardinality `{field_id, count, distincts}`; `field values` is the cached distinct set (`has_more_values: true` ⇒ truncated cache). `field update` patches metadata only; `base_type` isn't editable. - **card.** `dataset_query` is the **flat** `mbql/query` value, not a legacy `{type:"query",query:…}` envelope (→ `mbql` skill). `--export-format csv|xlsx` streams the raw export (pipe to a file), bypassing the JSON envelope. `archive` is the only delete; unarchive with `update --body '{"archived":false}'`. `visualization_settings` keys are scoped by `display` and aren't pre-flighted — see the `viz` skill. -- **dashboard.** Dashcards round-trip through `PUT /api/dashboard/:id` (no per-dashcard endpoint): `update-dashcard ` patches one safely; `update --body '{"dashcards":[…]}'` replaces the whole set (omitted ids are deleted server-side; use negative ids for new cards). `create` accepts the **same** `dashcards` array in its initial body, so you can lay out the whole dashboard in one call — negative ids for new cards, and `card_id:null` plus a `visualization_settings.virtual_card` block (`{display:"text"|"heading"|"link"|…}`) for non-question cards. `create`/`update` pre-flight every positive `card_id` against live server state and exit **2** with `{ok:false,errors:[…]}` on a bad ref — non-bypassable (no `--skip-validate`). `dashboard get ` (or `--full`) hydrates dashcards/tabs; `list` omits them. **Dashcard geometry: the grid is 24 columns wide.** Each dashcard's `{col, row, size_x, size_y}` is in grid units — `col` (0-indexed, left edge) and `size_x` are columns, `row`/`size_y` are rows; **full-width is `size_x: 24`** (`size_x: 12` is half a row — the usual cause of a card that only fills half the width, since it's a common per-chart default). Keep `col + size_x ≤ 24`, start each card's `col` at 0 for a full-width stack, and don't overlap cards (the server stores whatever you send — it won't auto-fix collisions). +- **dashboard.** Dashcards round-trip through `PUT /api/dashboard/:id` (no per-dashcard endpoint): `update-dashcard ` patches one safely; `update --body '{"dashcards":[…]}'` replaces the whole set (omitted ids are deleted server-side; negative ids for new cards). `create` accepts the **same** `dashcards` array in its initial body — lay out the whole dashboard in one call: negative ids for new cards, and `card_id:null` plus a `visualization_settings.virtual_card` block (`{display:"text"|"heading"|"link"|…}`) for non-question cards. `create`/`update` pre-flight every positive `card_id` against live server state and exit **2** with `{ok:false,errors:[…]}` on a bad ref — non-bypassable (no `--skip-validate`). `dashboard get ` (or `--full`) hydrates dashcards/tabs; `list` omits them. **Dashcard geometry: the grid is 24 columns wide.** Each dashcard's `{col, row, size_x, size_y}` is in grid units — `col` (0-indexed, left edge) and `size_x` are columns, `row`/`size_y` are rows; **full-width is `size_x: 24`** (`size_x: 12` is half a row — the usual cause of a card filling only half the width, since it's a common per-chart default). Keep `col + size_x ≤ 24`, start each card's `col` at 0 for a full-width stack, and don't overlap cards (the server stores whatever you send — it won't auto-fix collisions). - **snippet `--archived` is a swap, not a union** — list returns _either_ active _or_ archived rows, never both. (Same shape for `--filter archived` on dashboard/collection.) - **segment / measure** `update` and `archive` require a non-blank `revision_message` (audit-logged); the CLI does not synthesize it on `update`. `archive` defaults to `"Archived via mb CLI"` — override with `--revision-message`. `definition` is a flat MBQL clause (→ `mbql` skill): segment = a filter, measure = exactly one aggregation. - **collection ``** accepts four forms only — positive int, `root`, `trash`, or a 21-char entity_id — anything else is a client-side `ConfigError`. `collection items` auto-paginates (cap with `--limit`, which then omits `total`). `collection tree` is **JSON-only** — `--format text` is rejected. - **setting set** parses the value as **strict JSON**: a string is `'"value"'` (inner quotes), booleans `true`/`false`, numbers bare. Wrong quoting silently errors — confirm with `setting get ` after. `setting get --json` works on every value type (it wraps bare-text responses into `{key, value}`). - **search vs. list.** For plain enumeration of cards/dashboards/collections use the dedicated `… list` verbs; reach for `search --models ` only for ranking against a query string or a cross-resource lookup. - **transform.** Iterate with `transform update `, never `delete` + `create` — keeps the row, `entity_id`, materialized table, and YAML filename (avoids `_2` suffixes and noisy git history). `transform run` needs `--wait` (or `--sync`, which also waits for the run's output table to register and returns `target_table_id`) or you get only `{run_id, final:null}`. (→ `transform` skill.) -- **setup is one-shot.** `mb setup` walks `/api/setup` for a **fresh** instance only — it errors against an already-configured one. Mostly for bootstrapping local / e2e instances. +- **setup is one-shot.** `mb setup` walks `/api/setup` for a **fresh** instance only — errors against an already-configured one. Mostly for bootstrapping local / e2e instances. - **eid** translates a string entity id → numeric id: `mb eid --model --json` (EIDs are a positional used with `--model`; or pass `--body '{"entity_ids":{"card":["…"]}}'`). Entity ids are NanoIDs that can start with `-`, which the positional form misreads as a flag (shell quotes don't help — the `-` survives into argv). For an id that may start with `-`, use `--body` — the id is a JSON string value, immune to flag parsing: `mb eid --body '{"entity_ids":{"card":["-…"]}}'`. Useful when an external system hands you an entity id and a verb needs the numeric one. - **query / uuid.** `mb query` is the ad-hoc MBQL surface (`--print-schema` → `--dry-run` → run); `mb uuid --count ` mints the `lib/uuid` values every MBQL 5 clause needs. Both workflows live in the `mbql` skill. ## Specialized skills (load on demand) -This core file is enough for any single-command task. Load the relevant skill **proactively** when intent matches — don't wing an MBQL body, a transform body, or the git-sync workflow from this overview alone. Load each via `mb skills get `. +This core file is enough for any single-command task. Load the relevant skill **proactively** when intent matches — don't wing an MBQL body, a transform body, or the git-sync workflow from this overview alone. Load via `mb skills get `. - **`mbql`** — authoring or fixing any MBQL query body: `mb query`, a card `dataset_query`, a transform `source.query`, a measure/segment `definition`, "aggregate and group by", reading `--dry-run` errors. The query-body reference. - **`viz`** — choosing a card's `display` and authoring `visualization_settings`: "make it a bar chart", "set the pie dimension/metric", "format this column as currency", "the card renders as a table instead of a chart". The presentation counterpart to `mbql`. @@ -161,9 +157,5 @@ If a task spans more than one, load each. Specialized skills assume the conventi ## Don't -- **Don't run `mb auth login` for the user** — authentication is theirs (see §Auth). - Don't paste credentials or warehouse passwords in chat. Have the user run the storing command. -- Don't put `--profile` before the verb chain — the CLI parses it as a subcommand and errors out. -- Don't omit `--wait` on `transform run` / `git-sync import` for interactive flows; the next step will race the operation. -- Don't drop a JSON-envelope verb's output raw into another flag. Extract with `--json | jq -r '.'`. -- Don't add a third-party HTTP library or shell into `curl` against `/api/...` when a `mb ` exists — that bypasses retries, schema validation, and credential redaction. +- Don't shell into `curl` against `/api/...` (or add an HTTP library) when a `mb ` exists — that bypasses retries, schema validation, and credential redaction. diff --git a/skill-data/data-analysis/SKILL.md b/skill-data/data-analysis/SKILL.md index ca2b205..7a04ede 100644 --- a/skill-data/data-analysis/SKILL.md +++ b/skill-data/data-analysis/SKILL.md @@ -1,6 +1,6 @@ --- name: data-analysis -description: Answer real questions from clean, analysis-ready tables and hand back a plain-language report — not a chart-building task, an answer-finding one. Claude reads the available tables, turns the user's question ("who registered", "what did people say they want", "which option won") into queries, runs them against the live instance, sanity-checks the numbers, and writes up findings the user can read and trust. Works over tables that are already clean (wide, human-readable) — survey/registration answers, event signups, customer lists, anything where the user has a question and the data already holds the answer. Use whenever someone wants to "answer questions about my data", "report on who registered / signed up / responded", "what did people say", "analyze X", "explore this data", "find patterns", "summarize the responses", or "build me a report". For a non-technical user who knows their domain. This is the strategy skill for investigating clean data and reporting findings; if the question needs charts/dashboards instead of a written answer, use the `visualization` skill; if the tables are still raw and messy, use `data-transformation` first. +description: Answer real questions from clean, analysis-ready tables and hand back a plain-language report - an answer-finding task, not chart-building. Read the tables, turn the user's question into queries, run them on the live instance, sanity-check the numbers, write up findings the user can trust. Works over already-clean (wide, human-readable) data - survey/registration answers, event signups, customer lists, anything where the data holds the answer. Use when someone wants to "answer questions about my data", "report on who registered / signed up / responded", "what did people say", "analyze X", "explore this data", or "build me a report". For a non-technical user who knows their domain. Needs charts/dashboards? Use `visualization`. Tables still raw? Use `data-transformation` first. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index bb7f91c..a3711fa 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -10,7 +10,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. -Drive everything through the `mb` CLI. First load the skills you'll need: +Drive everything through the `mb` CLI. Load the skills you'll need: ```bash mb skills get core # auth, profiles, db/table/field inspection, query @@ -18,7 +18,7 @@ mb skills get mbql # if you build transform queries in MBQL mb skills get transform # creating/running transforms, run inspection ``` -Authentication is the user's job — pick the profile per `core`'s **Auth & profiles** section and pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open always matches the instance the CLI is hitting. +Authentication is the user's job — pick the profile per `core`'s **Auth & profiles** section and pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open matches the instance the CLI is hitting. If you are making transforms, use the transform skill. @@ -28,7 +28,7 @@ If you are making transforms, use the transform skill. A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: -- **No modeling jargon.** Skip the warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — and prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +- **No modeling jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. - Don't expect the user to understand raw SQL. - Group what you show by **the question a column answers**, never by which source table it came from. - Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. @@ -46,7 +46,7 @@ Sort every choice into one of these. 3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. 4. Never guess what a column or code means from its name. Confirm against the actual values. 5. Never silently drop a whole *thing*. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. -6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id is what lets two tables be combined later. You're building several tables about *related* things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible, and the user can't see it happened. (Same bargain as rule 1: that one preserves *filtering*, this preserves *combining*. Keep the ids; just don't make the user stare at them.) +6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id lets two tables be combined later. You're building several tables about *related* things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible and the user can't see it happened. Keep the ids; just don't make the user stare at them. 7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back *out*), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") 8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. 9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating *your own* transform (`transform update`), never for clobbering another. @@ -67,14 +67,14 @@ Phrase a prudential call as a lean plus a nod: ### Phase 0 — Get Oriented -**Get oriented first.** As soon as you know which database and schema you're in: -- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. the `open` / `xdg-open` command); if you can't, just paste the URL. DO NOT SKIP THIS STEP. +As soon as you know which database and schema you're in: +- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. `open` / `xdg-open`); else paste the URL. Don't skip this. - **Ask for a head start.** "Do you have a picture or file showing how your data fits together, like an ERD?" If yes, read it — it shortcuts the next steps. - **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. ### Phase 1 — Investigate (quietly) -Then dig in. Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. +Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. 1. **Map the tables.** List them; pull each one's column names and types; note its own id. 2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's *"Preferred vehicular transport"*. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. 3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to *link* to it, not duplicate it. @@ -83,7 +83,7 @@ Then dig in. Don't narrate this — a single "Let me take a look at what's in he 6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. Profile with the cleaning checklist (end of file) in mind — surface the quality smells you hit, don't silently fix them. 7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one *customer* across a main table + a loyalty table + custom-profile columns). Decide "one row per ___" for each and gather its attributes, decoded. Watch for a table that secretly mixes *two* things — a stable thing plus its repeating events; that's the split in the prudential calls above. -**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns by any attribute. **This is not goal-setting and you don't show it to the user or build any of it.** Its only purpose is to pressure-test your table design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or a finer grain *cheaply* preserves that flexibility, keep it. The clean data must serve the user's stated concern — but a good engineer doesn't scope so tightly that the next question means starting over. +**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns. **Don't show it to the user or build any of it.** It only pressure-tests your design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or finer grain *cheaply* preserves that flexibility, keep it. Serve the user's stated concern — but don't scope so tightly that the next question means starting over. ### Phase 2 — Present what you found (plain language) @@ -132,7 +132,7 @@ Then report plainly: > > How they connect: each **Order** belongs to a **Customer**; each **Order** lists one or more **Products**. -End on that connection map: it's what the user reads to trust the result, and what lets whatever they build next combine the tables correctly. +End on that connection map: it's what the user reads to trust the result, and what lets whatever they build next join the tables on the right ids instead of guessing how they relate. --- diff --git a/skill-data/mbql/SKILL.md b/skill-data/mbql/SKILL.md index 8c19ace..33fc457 100644 --- a/skill-data/mbql/SKILL.md +++ b/skill-data/mbql/SKILL.md @@ -1,6 +1,6 @@ --- name: mbql -description: Author Metabase MBQL 5 query bodies for the `mb` CLI — the only hand-authorable query format. Covers the JSON shape (lib/type mbql/query, flat stages, numeric ids), the "options object always second" clause rule, when lib/uuid is needed (it's optional — only to reference a clause), the print-schema → dry-run → run validation loop, where MBQL 5 is consumed (mb query, card dataset_query, transform source.query, measure/segment definition), the flat-vs-legacy-envelope footgun, joins and FK traversal, multi-stage pipelines, and naming aggregation output columns. Load whenever building or fixing an MBQL query by hand — "write an MBQL query", "create a card from MBQL", "the dataset_query is wrong", "fix the validation errors", "aggregate and group by", "order by the count", "join two tables", "month-over-month", or any `--dry-run` / `mb query` work. +description: Author Metabase MBQL 5 query bodies for the `mb` CLI - the only hand-authorable query format. Covers the JSON shape (lib/type mbql/query, flat numeric-id stages), the options-object-always-second clause rule, when lib/uuid is needed (optional - only to reference a clause), the print-schema/dry-run/run loop, where MBQL 5 is consumed (mb query, card dataset_query, transform source.query, measure/segment definition), the flat-vs-legacy-envelope footgun, joins and FK traversal, multi-stage pipelines, naming aggregation columns. Load when building or fixing an MBQL query by hand - "write an MBQL query", "create a card from MBQL", "the dataset_query is wrong", "fix the validation errors", "aggregate and group by", "join two tables", "month-over-month", or any `--dry-run` / `mb query` work. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- @@ -8,13 +8,13 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion MBQL 5 is the **only query format you can author by hand** with confidence — it has a bundled JSON Schema, so the CLI pre-flight-validates it before sending. Legacy MBQL 4 and native SQL are accepted but **not** schema-validated (see "Other formats" below). -Prefer MBQL over native SQL: it's portable across warehouse engines and the CLI pre-flight-validates it. Try it first, but don't force it — fall back to native SQL when MBQL can't express what you need, or when an MBQL body keeps failing server-side and you can't resolve it. +Prefer MBQL over native SQL: portable across warehouse engines and pre-flight-validated. Try it first; fall back to native SQL when MBQL can't express what you need, or when an MBQL body keeps failing server-side and you can't resolve it. -The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). +General flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). ## The shape -A query is a flat object — `lib/type`, a numeric `database` id, and an ordered `stages` array. No recursive `source-query` nesting; multi-step queries are sibling stages. +A flat object — `lib/type`, a numeric `database` id, and an ordered `stages` array. No recursive `source-query` nesting; multi-step queries are sibling stages. ```json { @@ -31,7 +31,7 @@ A query is a flat object — `lib/type`, a numeric `database` id, and an ordered } ``` -- **Numeric ids only.** `database`, `source-table`, and field ids are integers from `mb database list` / `mb table get --include fields`. (The portable YAML representation under git-sync uses _names_ like `[Sample Database, PUBLIC, ORDERS]`; the CLI's `/api/dataset` form uses numeric ids — don't mix them.) +- **Numeric ids only.** `database`, `source-table`, and field ids are integers from `mb database list` / `mb table get --include fields`. (Git-sync YAML uses _names_ like `[Sample Database, PUBLIC, ORDERS]`; the `/api/dataset` form uses numeric ids — don't mix them.) - **First stage** carries `source-table` (a table id) or `source-card` (a saved card). Later stages omit both and read the previous stage's output columns by name. - `source-card` references a saved card by its **numeric id** (from `mb card list`), not its string entity id; downstream fields are referenced by column name (string), not a field id. @@ -53,9 +53,9 @@ The same `[op, {options}, …]` rule holds for `aggregation`, `breakout` (a list ## UUIDs: optional — mint only to reference a clause -`lib/uuid` is **optional — leave it out whenever you can.** Omit it and the server generates a unique one for every clause as the query comes in; an empty options object `{}` is the normal, preferred case. Don't add a UUID per clause: it's needless work, and the more UUIDs you hand-manage the easier it is to trip the server's "all `lib/uuid`s must be unique" check — a duplicated UUID passes pre-flight, then fails server-side. +`lib/uuid` is **optional — leave it out whenever you can.** Omit it and the server generates a unique one for every clause; an empty options object `{}` is the normal case. The more UUIDs you hand-manage the easier it is to trip the server's "all `lib/uuid`s must be unique" check — a duplicated UUID passes pre-flight, then fails server-side. -Set an explicit `lib/uuid` only when you must **reference a clause from elsewhere in the query** — the one thing the server can't do for you, since you have to know the value to point at. The case that needs it: **ordering by (or otherwise reusing) an aggregation.** `["aggregation", {…}, ""]`'s third arg is the **string** `lib/uuid` of the target aggregation, so give that aggregation an explicit `lib/uuid` and point the ref at the same string. A numeric position fails with `must be the target aggregation's lib/uuid (string), not a numeric position`. +Set an explicit `lib/uuid` only when you must **reference a clause from elsewhere in the query** — you have to know the value to point at. The case that needs it: **ordering by (or otherwise reusing) an aggregation.** `["aggregation", {…}, ""]`'s third arg is the **string** `lib/uuid` of the target aggregation, so give that aggregation an explicit `lib/uuid` and point the ref at the same string. A numeric position fails with `must be the target aggregation's lib/uuid (string), not a numeric position`. ```json "aggregation": [["count", { "lib/uuid": "AGG_UUID" }]], @@ -64,7 +64,7 @@ Set an explicit `lib/uuid` only when you must **reference a clause from elsewher (`AGG_UUID` is both the aggregation's own `lib/uuid` and the string the ref points at — one value, by string equality. Every other clause omits its UUID. Expression refs work the same way but key off the expression's `lib/expression-name` string, so expressions rarely need an explicit `lib/uuid`.) -On the rare occasion you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is either rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``) or, if it happens to look valid, risks colliding with another clause. Only `mb uuid`gives you genuine, unique v4s — mint just the few you reference (this also covers native template-tag ids and any other`format: "uuid"` slot): +When you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``), or if it looks valid risks colliding with another clause. Only `mb uuid` gives genuine, unique v4s — mint just the few you reference (also covers native template-tag ids and any other `format: "uuid"` slot): ```bash mb uuid --count 2 --json # mint only the clauses you actually reference @@ -86,7 +86,7 @@ mb query --file q.json --profile --json # 3. validate + `path` is a JSON Pointer into the body (`/stages/0/aggregation/0`); `message` is the validator error. Exit codes: `0` valid + ran, `2` validation failed / malformed body, `1` server-side error after a valid pre-flight. -**Pre-flight is a lightweight shape check, not the full backend validator.** It checks JSON shape, `lib/uuid` format, and enum values — not operator names, the first-stage source rule, or whether a reference resolves. A clean `--dry-run` is necessary but not sufficient: a body can pass pre-flight and still fail on the server (exit `1`). The Metabase server is the authority — when a run fails, read its error and fix the body. The common ones and what they mean: +**Pre-flight is a lightweight shape check, not the full backend validator.** It checks JSON shape, `lib/uuid` format, and enum values — not operator names, the first-stage source rule, or whether a reference resolves. A clean `--dry-run` is necessary but not sufficient: a body can pass pre-flight and still fail on the server (exit `1`). The server is the authority — when a run fails, read its error and fix the body. Common ones: - `not a known MBQL clause` → a misspelled or unsupported **operator**. Check the vocabulary in `operators.md` (`mb skills get mbql --full`). - `Initial MBQL stage must have either :source-table or :source-card` → the **first stage** is missing its source (a numeric table or card id); only the first stage takes one, later stages read the previous stage's columns. @@ -95,11 +95,11 @@ mb query --file q.json --profile --json # 3. validate + A successful run emits the compact envelope by default: `data.rows` + slim `data.cols` (`name`, `display_name`, `base_type`, `semantic_type`). Pass `--full` for the raw `/api/dataset` envelope (`results_metadata`, `native_form`, per-column fingerprints/`field_ref`) only when you need that metadata; `--fields data.rows` narrows to rows alone. `mb query` also runs a **native** body — `{database, type:"native", native:{query:"SELECT …"}}` — which skips pre-flight; the quickest way to eyeball warehouse data. -`--skip-validate` bypasses the pre-flight and sends as-is — use only when the bundled schema disagrees with what the server actually accepts (drift / false negative). Mutually exclusive with `--dry-run`. The same flag exists on `card create/update` and `transform create/update`. +`--skip-validate` bypasses pre-flight and sends as-is — use only when the bundled schema disagrees with what the server actually accepts (drift / false negative). Mutually exclusive with `--dry-run`. Same flag exists on `card create/update` and `transform create/update`. ## Where MBQL 5 is consumed -The same body and the same pre-flight apply everywhere a query is embedded. Each pre-flights only when the value is MBQL 5 (`lib/type: "mbql/query"`); legacy shapes skip it; `--skip-validate` bypasses. +The same body and pre-flight apply everywhere a query is embedded. Each pre-flights only when the value is MBQL 5 (`lib/type: "mbql/query"`); legacy shapes skip it; `--skip-validate` bypasses. | Command | MBQL 5 lives at | Notes | | --------------------------------------- | ---------------------------------------------- | ------------------------------------------- | @@ -122,16 +122,16 @@ The most common mistake. The legacy MBQL 4 shape `{ "type": "query", "database": } ``` -No `type:"query"` wrapper, no `query:` nesting. If you wrap MBQL 5 inside a legacy envelope the CLI rejects it pre-send with a `ConfigError` (no `--skip-validate` gets it past). If it ever reached the server it would store silently and fail at run time with `Initial MBQL stage must have either :source-table or :source-card`. +No `type:"query"` wrapper, no `query:` nesting. If you wrap MBQL 5 inside a legacy envelope the CLI rejects it pre-send with a `ConfigError` (no `--skip-validate` gets it past). If it reached the server it would store silently and fail at run time with `Initial MBQL stage must have either :source-table or :source-card`. ## Other formats skip pre-flight -Anything that is not `lib/type: "mbql/query"` is sent as-is and normalized server-side: +Anything not `lib/type: "mbql/query"` is sent as-is and normalized server-side: - **Legacy MBQL 4** — `{ "type": "query", "database": N, "query": { "source-table": T, … } }` - **Native SQL** — `{ "type": "native", "database": N, "native": { "query": "SELECT …" } }` -`mb query --file probe.json` runs these directly; `--dry-run` on them returns `{ ok: true, errors: [] }`. Don't author MBQL 4 by hand — if you need a legacy or complex query, build it in the Metabase UI and pull the body with `mb card get --full --json` / `mb transform get --full --json`. +`mb query --file probe.json` runs these directly; `--dry-run` on them returns `{ ok: true, errors: [] }`. Don't author MBQL 4 by hand — build a legacy or complex query in the Metabase UI and pull the body with `mb card get --full --json` / `mb transform get --full --json`. ## Joins and FK traversal @@ -154,7 +154,7 @@ Two ways to read columns from a related table. "breakout": [["field", { "join-alias": "Customers" }, 1682]] ``` -The condition's left ref is a column of the stage's own source (`1711` = orders.customer_id); the right ref carries `join-alias` and points at the joined table's key (`1684` = customers.id). Every later reference to a joined column (`1682` = customers.plan) needs that same `join-alias`. Stack multiple objects in `joins` for multiple joins, each with its own `alias`. +Left ref is a column of the stage's own source (`1711` = orders.customer_id); the right ref carries `join-alias` and points at the joined table's key (`1684` = customers.id). Every later reference to a joined column (`1682` = customers.plan) needs that same `join-alias`. Stack multiple objects in `joins`, each with its own `alias`. **Implicit FK join via `source-field`.** For a single-hop FK lookup, skip the join — put the FK column's id in the target field's `source-field` option and Metabase traverses the relationship: @@ -166,7 +166,7 @@ The condition's left ref is a column of the stage's own source (`1711` = orders. ## Multi-stage pipelines -Stages run in order; each reads the **previous stage's output columns** — the breakouts and aggregations it produced — referenced by **string name + `base-type`**, not a numeric field id. Only the first stage takes a `source-table`/`source-card`. The reason to add a stage is to operate on an aggregate (you can't filter or order by an aggregation within the stage that computes it): aggregate, then filter the aggregate, then order + limit. +Stages run in order; each reads the **previous stage's output columns** — the breakouts and aggregations it produced — referenced by **string name + `base-type`**, not a numeric field id. Only the first stage takes a `source-table`/`source-card`. Add a stage to operate on an aggregate (you can't filter or order by an aggregation within the stage that computes it): aggregate, then filter the aggregate, then order + limit. ```json "stages": [ @@ -197,7 +197,7 @@ Later stages address the first stage's aggregation by the `name` you gave it (`" ## Naming aggregation output columns -Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2`, `sum`, … — fine for an ad-hoc run, ugly when the output is a transform target table or a card column. Set `name` (becomes the warehouse column name) and `display-name` (the UI header) in the aggregation's options: +Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2`, `sum`, … — fine for an ad-hoc run, ugly for a transform target table or card column. Set `name` (the warehouse column name) and `display-name` (the UI header) in the aggregation's options: ```json ["count", { "name": "shipments_shipped", "display-name": "Shipments shipped" }] @@ -205,7 +205,7 @@ Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2 ## Operator reference -The full operator vocabulary — filter operators (`=`, `!=`, `<`, `between`, `contains`, `is-null`, …), aggregation functions (`count`, `sum`, `avg`, `distinct`, `count-where`, `share`, …), expression operators (arithmetic, string, temporal), temporal-bucketing units, and binning strategies — lives in this skill's `references/operators.md`, in the CLI's numeric-id form. Load it on demand rather than dumping the schema: +The full operator vocabulary — filter operators (`=`, `!=`, `<`, `between`, `contains`, `is-null`, …), aggregation functions (`count`, `sum`, `avg`, `distinct`, `count-where`, `share`, …), expression operators (arithmetic, string, temporal), temporal-bucketing units, and binning strategies — lives in this skill's `references/operators.md`, in numeric-id form. Load it on demand rather than dumping the schema: ```bash mb skills get mbql --full # appends references/operators.md to this body @@ -217,7 +217,7 @@ mb skills path mbql # → the skill dir; then Read references/operator ## Don't - Don't mint a `lib/uuid` for every clause — they're optional; omit them and the server fills them in. Mint (with `mb uuid`) only the clause you need to reference; never invent, hard-code, or copy a UUID (duplicates are rejected server-side). -- Don't put the options object anywhere but slot 1, and don't use the legacy `["field", id, opts]` order. +- Keep the options object in slot 1 of every clause — `[op, {options}, ...args]`, id last (`["field", {}, 1779]`). The legacy `["field", id, opts]` order (id second) is rejected pre-flight. - Don't wrap an MBQL 5 body in `{type:"query", query:…}` — `dataset_query` / `source.query` / `definition` is the flat `mbql/query`. - Don't author MBQL 4 by hand — build it in the UI and pull it with `… get --full --json`. - Don't skip the `--dry-run` loop on a non-trivial query — it's free and exact. diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 1ecafd2..9eaeb1d 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -1,31 +1,31 @@ --- name: robot-data-engineer -description: The front door for turning a database into something a non-technical person can actually use — clean tables, reusable definitions, dashboards, and answers to real questions — all through the `mb` CLI. This skill is a light router — it works out where the user is (raw data? clean tables already? ready to chart? just need a question answered?), sets up auth and how hands-on they want to be, then loads the right specialized skill to do the work. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "report on who registered / signed up / responded", "analyze X", "be my data analyst / data engineer", "set up analytics for X", or otherwise asks for the whole journey rather than one specific step. (Working title — name TBD before merge.) +description: The front door for turning a database into something a non-technical person can use - clean tables, reusable definitions, dashboards, and answers - all through the `mb` CLI. A light router - it works out where the user is (raw data? clean tables? ready to chart? just need a question answered?), sets up auth and how hands-on they want to be, then loads the right specialized skill. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "report on who registered / signed up / responded", "analyze X", "be my data analyst / data engineer", "set up analytics for X", or asks for the whole journey rather than one step. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- # Robot Data Engineer -You're the **front door**, not the worker. Your job is to point the user at the right tool and get out of the way. The actual work lives in three specialized skills; you figure out which one the user needs right now, set up the shared context once, and hand off. Keep yourself small — the moment you know which skill to load, load it and let it drive. +You're the front door, not the worker. Point the user at the right tool and get out of the way. The work lives in four specialized skills; figure out which one the user needs now, set up shared context once, and hand off. The moment you know which skill to load, load it and let it drive. -The journey, end to end, is four stages: +The four stages: -1. **Raw data → clean tables** — the `data-transformation` skill. Takes a messy, normalized source database and builds a small set of wide, clean, analysis-ready tables. -2. **Clean tables → reusable definitions** — the `semantic-layer` skill. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. -3. **Tables/definitions → charts and dashboards** — the `visualization` skill. Builds the questions and dashboards people actually look at. -4. **Clean tables → answers and reports** — the `data-analysis` skill. Takes a real question ("who registered", "what did people say") and a clean table that holds the answer, runs the queries, sanity-checks them, and hands back a plain-language report. +1. **Raw data → clean tables** — `data-transformation`. Turns a messy, normalized source database into a small set of wide, clean, analysis-ready tables. +2. **Clean tables → reusable definitions** — `semantic-layer`. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. +3. **Tables/definitions → charts and dashboards** — `visualization`. Builds the questions and dashboards people look at. +4. **Clean tables → answers and reports** — `data-analysis`. Takes a real question ("who registered", "what did people say") and a clean table that holds the answer, runs the queries, sanity-checks them, hands back a plain-language report. -Stages 3 and 4 are siblings, not sequential: charting and answering-in-prose are two things you can do with clean data — route to whichever the goal calls for. Most users don't say which stage they want — they describe a goal. Your job is to map the goal to a stage, confirm you've got it right, and route. +Stages 3 and 4 are siblings, not sequential — charting and answering-in-prose are two things you can do with clean data; route to whichever the goal calls for. Users describe a goal, not a stage. Map the goal to a stage, confirm, and route. --- ## Setup — do this once, up front -Before routing, settle two things so the child skills don't have to re-ask: +Settle two things before routing so the child skills don't re-ask: -1. **Auth.** Pick the profile per `core`'s **Auth & profiles** section — `mb auth list --json`; one → use it, several → ask which, none → ask the user to `mb auth login` — then carry `--profile ` into everything. (That's the canonical recipe; this one line is here because the router is the front door and may run before `core` is loaded.) +1. **Auth.** Pick the profile per `core`'s **Auth & profiles** section — `mb auth list --json`; one → use it, several → ask which, none → ask the user to `mb auth login` — then carry `--profile ` into everything. (Canonical recipe; restated here because the router may run before `core` is loaded.) -2. **How hands-on they want to be** (the autonomy slider). Ask once, plainly, and remember it for the whole session — tell the child skill which mode the user picked so they aren't asked again: +2. **How hands-on they want to be** (the autonomy slider). Ask once, plainly, remember it for the whole session, and tell the child skill the chosen mode so they aren't asked again: > Quick thing — how hands-on do you want to be? > • **Check with me on everything** — I'll run each step past you first. @@ -125,15 +125,15 @@ Load a skill with `mb skills get `. Then **hand off** — the child owns i ## The whole journey -When the user wants the full arc (raw → dashboard), run the three stages in order, handing off to each child in turn. Between stages, let the child's own stopping point double as a check-in: clean tables exist and look right → move to definitions → move to charts. You don't need a heavy gate between every stage (the children handle their own), but do confirm the user's happy before starting the next one in **Check with me on everything** mode, and always finish with your end-of-journey recap. +For the full arc (raw → dashboard), run the stages in order, handing off to each child in turn. Let each child's stopping point double as a check-in: clean tables exist and look right → definitions → charts. No heavy gate between stages (children handle their own), but in **Check with me on everything** mode confirm the user's happy before starting the next, and always finish with your end-of-journey recap. -A user can also drop in at any stage — that's the whole point of detecting state. Someone who already has clean tables and just wants metrics gets routed straight to `semantic-layer`; don't drag them back through cleaning. +A user can drop in at any stage — that's the point of detecting state. Someone with clean tables who just wants metrics goes straight to `semantic-layer`; don't drag them back through cleaning. --- ## Don't -- **Don't do the children's work yourself.** If you're writing transform SQL or segment definitions in this skill, you've gone too deep — load the child and let it work. +- **Hand the work to the child skill — don't do it yourself.** The moment you'd be writing transform SQL or a segment definition here, stop and `mb skills get` the right child; let it drive. You route and set up context; the child does the work. - **Don't re-ask the autonomy question** once it's set; pass it down. - **Don't skip the starting-state check** and assume raw data — a user with clean tables shouldn't be sent through cleaning. - **Don't build on raw data when the goal needs clean tables** — route to the earlier stage first. diff --git a/skill-data/semantic-layer/SKILL.md b/skill-data/semantic-layer/SKILL.md index 6d0a6a6..730fd58 100644 --- a/skill-data/semantic-layer/SKILL.md +++ b/skill-data/semantic-layer/SKILL.md @@ -1,6 +1,6 @@ --- name: semantic-layer -description: Turn clean, analysis-ready tables into a shared vocabulary everyone reuses — Metabase segments (saved filters like "active customers"), measures (saved calculations like "net revenue"), and metrics (official numbers like "monthly recurring revenue") — so people stop reinventing the same definitions five different ways. Find the questions people keep asking, propose segments and measures in plain language (teaching the Metabase terms as you go), graft them onto what the org already tracks, and build them via `mb segment create` / `mb measure create` / `mb card create`. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", "so everyone uses the same definition", "save this filter/calculation/metric for the team", or "create a segment / measure / metric". This is the strategy skill for deciding which reusable definitions an org needs and designing them; for the raw `mb segment` / `mb measure` command mechanics (flags, body shape), use the `core` skill instead. +description: Turn clean, analysis-ready tables into a shared vocabulary the org reuses - Metabase segments (saved filters, e.g. active customers), measures (saved calculations, e.g. net revenue), and metrics (official numbers, e.g. monthly recurring revenue) - so people stop reinventing the same definition five ways. Find the questions people keep asking, propose definitions in plain language, graft them onto what the org already tracks, build them via `mb segment` / `mb measure` / `mb card` create. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", or "create a segment / measure / metric". Strategy skill for designing reusable definitions; for raw `mb segment` / `mb measure` mechanics, use `core`. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- @@ -18,9 +18,9 @@ You build three kinds of reusable thing. These are real Metabase features with r Introduce each like: _"I'll save this as a **segment** — that's Metabase's word for a reusable filter, so you can pull up active customers with one click anytime."_ After that, just say "segment". -This skill runs **after** the analysis-ready tables exist (build those with transforms — load `mb skills get transform`). Segments and measures only reach one table — no joins, no nesting (see the docs' Limitations sections) — so a semantic layer on raw, normalized tables is nearly useless: a real answer rarely lives in a single raw table. So: **wide clean tables first, segments/measures/metrics second.** +This skill runs **after** the analysis-ready tables exist (build those with transforms — load `mb skills get transform`). Segments and measures only reach one table — no joins, no nesting (see the docs' Limitations sections) — so a semantic layer on raw, normalized tables is nearly useless: a real answer rarely lives in a single raw table. **Wide clean tables first, segments/measures/metrics second.** -You drive everything through the `mb` CLI. Before you start, load the CLI skills you'll need: +You drive everything through the `mb` CLI. Load the CLI skills you'll need: ```bash mb skills get core # auth, profiles, db/table/field inspection, query, search @@ -35,7 +35,7 @@ Authentication is the user's job. Check `mb auth list --json`; if one profile ex A **non-technical user who knows their domain well.** They know the business — who an "active" customer is, what counts as "revenue" — but not databases. So: -- **Teach the words a curious non-engineer can follow; skip the deep-internals jargon.** Two sets are fine and worth teaching: Metabase product terms (**segment, measure, metric, collection, Library, the Filter / Summarize blocks**) and common data words a domain user can reasonably learn (**table, column, foreign key, schema, join, filter, row**) — gloss them once, then use them. What you still avoid is the **deep-internals jargon** that buys nothing for this user: grain, cardinality, normalize/denormalize, surrogate key, MBQL, `table_id`, materialize. Prefer the plain effect when it's clearer ("this number needs data from two tables" reads easier than "this needs a join across two fact tables") — but you don't have to contort around "foreign key" or "schema". +- **Teach the words a curious non-engineer can follow; skip the deep-internals jargon.** Two sets are fine and worth teaching: Metabase product terms (**segment, measure, metric, collection, Library, the Filter / Summarize blocks**) and common data words a domain user can reasonably learn (**table, column, foreign key, schema, join, filter, row**) — gloss them once, then use them. Avoid **deep-internals jargon** that buys nothing for this user: grain, cardinality, normalize/denormalize, surrogate key, MBQL, `table_id`, materialize. Prefer the plain effect when it's clearer ("this number needs data from two tables" reads easier than "this needs a join across two fact tables") — but you don't have to contort around "foreign key" or "schema". - **Talk about the question, then name the object.** Lead with what it does for them, then attach the term: _"I'll save 'big orders' as a segment so you can pull them up with one click."_ Not a bare "I'll create a segment on `table_id` 235." - **Be a helpful colleague, not an engineer reporting status.** Elide the wiring (ids, query bodies, the CLI). Ask the one question that actually matters. @@ -74,7 +74,7 @@ The user already picked an autonomy mode (the router's Shared Contract asks the - **Which kind of thing is it?** Same wish, three possible homes: - "Let me filter to just the active ones" → a **segment** (saved filter). - "Let me add up revenue the same way everywhere, on this table" → a **measure** on the table. - - "Revenue is an _official company number_ people pull onto dashboards" → a **metric** in a collection, ideally with a default month-by-month view. Lean: make it a metric when it's a headline figure the org reuses across many questions/dashboards; keep it a measure when it's a table-local convenience. + - "Revenue is an _official company number_ people pull onto dashboards" → a **metric** in a collection, with a default month-by-month view so it charts cleanly. Lean: make it a metric when it's a headline figure the org reuses across many questions/dashboards; keep it a measure when it's a table-local convenience. - **Where the metric lives.** Metrics sit in a collection (folder). Lean: put the org's blessed ones in the shared **Library** so they surface prominently; keep experimental ones in a working collection until trusted. - **Default time dimension for a metric.** A monthly default makes it chart nicely on a dashboard, but doesn't lock anyone out of other groupings. Lean: set a sensible default (usually month) for anything headline; leave it off for raw counts that aren't inherently time-series. - **How strict a segment is.** "Active" = last 30 vs 90 days is a real business call with no right answer from the data alone. Lean: surface the few reasonable thresholds with how many rows each catches, let the user pick. @@ -89,7 +89,7 @@ Phrase a prudential call as a lean plus a nod: ### Phase 0 — Understand what's reusable (quietly) -Don't narrate. One "Let me see what's here and how people are already slicing it" is plenty. Then dig in. Keep it cheap — compact column listings, `LIMIT`/`GROUP BY` samples, never whole-warehouse rollups. +Don't narrate. One "Let me see what's here and how people are already slicing it" is plenty. Keep it cheap — compact column listings, `LIMIT`/`GROUP BY` samples, never whole-warehouse rollups. 1. **Confirm the analysis-ready tables exist.** List tables; find the wide, clean ones (a transform step's output). If the user is pointing you at raw normalized tables, say so plainly and suggest building the clean table first — don't build a hobbled semantic layer on raw data. 2. **Find the questions people keep asking.** Search existing saved questions and dashboards (`mb search`, `mb card list`) for repeated filters and repeated calculations — the same "status = active" written eleven times, five hand-rolled versions of revenue. Those repeats _are_ the semantic layer waiting to be named. This is the highest-signal input; mine it before proposing anything. @@ -115,7 +115,7 @@ Show, in plain terms, the definitions worth saving — lead with what each _does > • **Monthly recurring revenue** — I'd save this as a metric with a month-by-month default, since it's a dashboard headline. Good? -Then surface what you're _not_ saving and why ("I left 'orders this week' alone — it's a one-off, not something you'd reuse"). And ask your prudential questions — one at a time, lean-plus-nod. In "Check on everything" mode, confirm each definition here before Phase 3. In "Balanced", ask only the judgment calls. In "Just go", state your picks and move on. +Then surface what you're _not_ saving and why ("I left 'orders this week' alone — it's a one-off, not something you'd reuse"). Ask your prudential questions — one at a time, lean-plus-nod. In "Check on everything" mode, confirm each definition here before Phase 3. In "Balanced", ask only the judgment calls. In "Just go", state your picks and move on. ### Phase 2 — Iterate (cheap, nothing built yet) diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index a512688..dcfcd8d 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion A **transform** persists the result of a query (native SQL or MBQL) to a warehouse table the user can read from cards, dashboards, and other transforms. It runs on a schedule (via `transform-job`) or on-demand (`transform run`). -This skill covers the create-and-run flow for one transform. The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding *which* transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). +Flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding *which* transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). ## Body shape @@ -17,7 +17,7 @@ A transform has two halves: - `source` — the query to run (`type: "query"`, with `query.type` of `native` or `mbql`). - `target` — the warehouse destination (`type: "table"`, with `database`, `schema`, `name`). -Native SQL is the simplest source and the easiest to author by hand (see "Create + run" below). MBQL is what the Metabase UI emits and is more verbose; pull a sample with `mb transform get --full --json` if you need its shape. +Native SQL is the simplest source and the easiest to author by hand. MBQL is what the Metabase UI emits and is more verbose; pull a sample with `mb transform get --full --json` if you need its shape. For an **MBQL 5** `source.query` (`lib/type: "mbql/query"`), the body shape, the "options object is always second" clause rule, UUID minting, aggregation/order-by refs, naming aggregation output columns, and the `--print-schema` → `--dry-run` validation loop are all in the `mbql` skill — **`mb skills get mbql`**. The MBQL-5 pre-flight on `transform create`/`update` is documented there too (legacy MBQL 4 and native sources skip it). For a transform target, naming your aggregation output columns matters more than usual — a bare `count` / `avg_2` becomes the warehouse column name; see the `mbql` skill's "Naming aggregation output columns". @@ -57,7 +57,7 @@ Notes: - Target `schema` is the schema the result table is written into (e.g. `public`). - `--wait` on `transform run` polls until status is `succeeded` or `failed`. Without it you only get `{message: "Transform run started", run_id, final: null}` and have to poll yourself. - `--sync` implies `--wait`, then waits until the run's output table is registered — the run registers it itself, no `db sync-schema` needed — adding `target_table_id` to the envelope. Use it when you'll build MBQL on the output (see "Inspect"). -- The `--json` envelope is shape-stable: `{message, run_id, final}` (plus `target_table_id` under `--sync` — a number, or `null` if the table didn't register before the timeout). `final` is always present — `null` when `--wait` is omitted or the run never started, otherwise a full `TransformRun` object with `status` and `message`. On a failed run (`final.status` ∈ {`failed`, `timeout`, `canceled`}) the CLI exits 1 and writes a one-line summary `transform run failed` to stderr; the failure detail lives only in `final.message` on stdout, so `jq -r '.final.message'` is where to look. +- The `--json` envelope is shape-stable: `{message, run_id, final}` (plus `target_table_id` under `--sync` — a number, or `null` if the table didn't register before the timeout). `final` is `null` when `--wait` is omitted or the run never started, otherwise a full `TransformRun` object with `status` and `message`. On a failed run (`final.status` ∈ {`failed`, `timeout`, `canceled`}) the CLI exits 1 and writes a one-line summary `transform run failed` to stderr; the failure detail lives only in `final.message` on stdout, so `jq -r '.final.message'` is where to look. - The heredoc with single-quoted `'EOF'` prevents shell from interpolating any `$vars` inside the SQL. - `transform create --json` returns the agent-facing compact projection: `{id, name, description, source_type, target: {type, database, schema, name}, target_db_id}`. Read `target.schema`/`target.name` directly off the create output — no follow-up `transform get` needed to verify where the transform will write. - If a transform with the same `name` already has a YAML representation on disk under the configured remote-sync repo, `create` mints a `_2` suffix on the exported filename (the new transform gets a fresh `entity_id`; the prior one isn't touched). For "iterate on the same concept" workflows, prefer `transform update ` — see "Iterating on a failing transform" below. @@ -70,14 +70,14 @@ mb transform list --profile --json mb transform get --profile --full --json # full transform incl. last run summary ``` -After a run the table physically exists in the warehouse, but Metabase addresses tables/columns by numeric id, so **MBQL and the UI can't reference a brand-new table until the instance syncs** (native SQL — a native `card` or `mb query` against `.` — reads it immediately). Run and register in one step with `--sync`: +After a run the table physically exists in the warehouse, but Metabase addresses tables/columns by numeric id, so **MBQL and the UI can't reference a brand-new table until the instance syncs** (native SQL — a native `card` or `mb query` against `.` — reads it immediately). Run and register in one step with `--sync`. ```bash TABLE_ID=$(mb transform run --sync --profile --json | jq -r '.target_table_id') mb table get "$TABLE_ID" --include fields --profile --json # field ids for MBQL ``` -`--sync` runs the transform and polls until its output table is registered, returning the id as `target_table_id` — the run registers the table itself, so no `db sync-schema` is needed. On `target_table_id: null` (still syncing when the poll timed out; exit 0) re-poll `mb transform get --full --json` until the `target_table_id` / `table` linkage lands. +On `target_table_id: null` (still syncing when the poll timed out; exit 0) re-poll `mb transform get --full --json` until the `target_table_id` / `table` linkage lands. Columns and types are inferred from the result set; change the SELECT shape and the next run fails on a column mismatch — drop the table first (`transform delete-table `). A changed shape also needs a re-run with `--sync` before MBQL sees the new/renamed columns. @@ -104,14 +104,14 @@ Notes: ## Update body: send only writable keys, never round-trip the GET body -`transform update ` is **PATCH semantics** — only send the fields you actually want to change. The endpoint accepts exactly these writable keys: +`transform update ` is **PATCH semantics** — only send the fields you want to change. The endpoint accepts exactly these writable keys: ``` name, description, source, target, run_trigger, tag_ids, collection_id, owner_user_id, owner_email ``` -**Don't paste the output of `transform get` into a `transform update` body.** The GET response carries server-side fields (`id`, `entity_id`, `created_at`, `updated_at`, `creator_id`, `last_run`, `target_db_id`, `target_table_id`, `source_type`, `source_database_id`, `source_readable`, `creator`, `owner`, `table`, …) that the PUT endpoint isn't built to handle. Currently, unknown top-level keys flow into `t2/update!` and produce a leaked H2 SQL error like: +**Don't paste the output of `transform get` into a `transform update` body.** The GET response carries server-side fields (`id`, `entity_id`, `created_at`, `updated_at`, `creator_id`, `last_run`, `target_db_id`, `target_table_id`, `source_type`, `source_database_id`, `source_readable`, `creator`, `owner`, `table`, …) that the PUT endpoint isn't built to handle. Unknown top-level keys flow into `t2/update!` and produce a leaked H2 SQL error like: ``` Column "TAGS" not found; SQL statement: @@ -153,7 +153,7 @@ mb transform get --full --profile --json \ ## Iterating on a failing transform -When `transform run` fails and you want to retry with a fixed body, **prefer `transform update --file body.json` over `transform delete ` + `transform create`.** Update keeps the same row, the same `entity_id`, the same materialized table, and the same on-disk YAML filename. Concretely this means: +When `transform run` fails and you want to retry with a fixed body, **prefer `transform update --file body.json` over `transform delete ` + `transform create`.** Update keeps the same row, the same `entity_id`, the same materialized table, and the same on-disk YAML filename: - `git-sync export` produces **one** clean commit containing only the fix, instead of "broken transform" + "remove broken transform" landing as two commits in `git log`. - You don't have to chase `_2` suffixes minted when two YAMLs share a `name` on disk (see the `transform create` notes above). diff --git a/skill-data/visualization/SKILL.md b/skill-data/visualization/SKILL.md index 328d671..6234e7f 100644 --- a/skill-data/visualization/SKILL.md +++ b/skill-data/visualization/SKILL.md @@ -15,7 +15,7 @@ A card has two presentation fields alongside its `dataset_query`: Nothing validates `visualization_settings` — there is no pre-flight to fail past. A `display` typo or a misnamed key is accepted by the API; the card just renders as a default table or drops the setting. So **the feedback loop is read-back, not pre-flight**: after `card create`/`update`, confirm with `mb card get --full --json` (or open the card) that it rendered as intended. -General flag conventions and body-input precedence live in the `core` skill (`mb skills get core`); the `dataset_query` itself is the `mbql` skill's job (`mb skills get mbql`). This skill is only about how the result is displayed. +Flag conventions and body-input precedence live in the `core` skill (`mb skills get core`); the `dataset_query` itself is the `mbql` skill's job (`mb skills get mbql`). This skill is only about how the result is displayed. Two steps: **(1) pick the `display` that fits the data**, then **(2) bind the data columns and set options**. @@ -66,7 +66,7 @@ Closed `display` enum (card-level, non-hidden): `table`, `bar`, `line`, `area`, `graph.dimensions`, `graph.metrics`, `pie.dimension`, `pie.metric`, `scalar.field`, `funnel.metric`, `map.latitude_column`, `sankey.source`, … all take **output column-name strings** — the names the query _produces_, not field ids. A `count` aggregation outputs the column `count`; a breakout on a field outputs that field's name; a named aggregation outputs its `name`. These strings are **identical in the API form and the portable (git-sync) form** — no numeric-vs-name footgun here. -So the names you put in `visualization_settings` come from the query's output, not from `mb field`/`mb table`. If you set `name` on an aggregation (see the `mbql` skill), use that same string here. +The names come from the query's output, not from `mb field`/`mb table`. If you set `name` on an aggregation (see the `mbql` skill), use that same string here. ## Minimum-viable settings per chart family (API form) @@ -138,7 +138,7 @@ For anything beyond a single dimension + metric — combo charts, conditional fo mb card get --full --json | jq '.visualization_settings' ``` -Paste that block into your `card create`/`update` body. The server produced it, so it's valid for that `display`. This beats guessing keys from memory, and it's token-cheap. +Paste that block into your `card create`/`update` body. The server produced it, so it's valid for that `display`. ## Full per-visualization key catalog From 98831440c74c6670f00cb2becda9e58da5cc2a7f Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 12:48:13 -0600 Subject: [PATCH 13/31] Dedup question-context rule in router Shared Contract Timothy's "Robot Data Analysts should give more context" (964b272) added a "Questions must carry their own context" paragraph that overlapped a bullet I'd added in the same Shared Contract. Keep his fuller version (it carries the recap template) as canonical, drop the redundant bullet, and point to it from the "Talking to the user" list so the rule lives in one place. --- skill-data/robot-data-engineer/SKILL.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 9eaeb1d..2622d2d 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -60,11 +60,10 @@ This is the single source for the rules every child skill follows. Children carr **Scratch files.** Working files — transform/query/patch JSON bodies, notes — go in `./.scratch` in the current working directory, **never `/tmp`**. Better permissions, it persists across the session, and the user can open and review it. `mkdir -p ./.scratch` if it isn't there yet. -**Talking to the user.** Four habits, because the last few demo runs slipped on them: +**Talking to the user.** Habits the last few demo runs slipped on (see also "Questions must carry their own context" below): - **Don't reference things they never saw.** If *you* built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. - **Assume they read only the last ~30 lines.** Don't lean on context from far up the conversation; restate what they need to act on your question. -- **Questions carry their own context.** Ask the whole question, not shorthand — "Which status counts as registered — confirmed only, or everyone?" not "confirmed or all?". - **Plain permission requests.** Don't paste a wall of SQL or JSON and ask "run this?". Summarize the action in one sentence — "Want me to add a column linking registrations to accounts?" — and offer to show the details if they ask. **Autonomy slider.** Ask once, up front (the router does this in Setup), then remember it for the whole session — children read the chosen mode, they don't re-ask: From 7fc2c6304090f49a69136073c816b5c15daea036 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 12:55:28 -0600 Subject: [PATCH 14/31] Add lint:skills script (skillsaw via uvx) --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 72b69d7..a65ec96 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "typecheck": "tsc --noEmit", "lint": "oxlint", "lint:fix": "oxlint --fix", + "lint:skills": "uvx skillsaw lint skill-data/ --strict", "format": "oxfmt", "format:check": "oxfmt --check", "sync:representations": "bun run scripts/sync-representations.ts", From 6bccd2f9a36e105c08b4d5393a8d97a39a67e7c1 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 13:01:28 -0600 Subject: [PATCH 15/31] remove overfitting, eg fivetran mention --- skill-data/data-transformation/SKILL.md | 4 ++-- skill-data/robot-data-engineer/SKILL.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index a3711fa..2108390 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion > **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: ask before showing PII row-by-row (names, emails, phones) — default to aggregates; when asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL; honor the autonomy mode the user picked. The jargon rules are spelled out in detail below (**Who you're talking to**). Full contract and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. -Your job: take a raw source database — usually normalized, often Fivetran-synced from some SaaS tool — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. +Your job: take a raw source database — usually normalized, often synced from some SaaS tool by a connector like Fivetran, Airbyte, or Stitch — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. Drive everything through the `mb` CLI. Load the skills you'll need: @@ -110,7 +110,7 @@ Cheap, because nothing's built. Adjust the set of things, what's kept, and the s Build one wide transform per agreed thing. Each table: - **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. -- **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's delete flag — e.g. `_fivetran_deleted` — so tombstones never reach clean data). +- **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's soft-delete flag — Fivetran's `_fivetran_deleted`, Airbyte's `_ab_cdc_deleted_at`, or a plain `deleted_at`/`is_deleted` — so tombstones never reach clean data; not every source has one). - **Clean, plain column names**, consistent across tables. - **Multi-valued pieces** in the agreed filterable structure — never opaque text. - **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine *beside* the rows, never *instead of* them — a frozen total only ever answers the one question it was summed for. diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 2622d2d..0f4c953 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -51,7 +51,7 @@ This is the single source for the rules every child skill follows. Children carr **Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI *can* do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. -**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a *different* readable table and present its numbers as the answer (that's how a question about the Account table gets silently answered with Salesforce data). Instead, in order: +**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a *different* readable table and present its numbers as the answer (that's how a question about the customers table gets silently answered with a lookalike table from another schema). Instead, in order: 1. **Stop.** Don't substitute another table and pass it off as the answer. 2. **Surface and diagnose in plain, friendly terms.** Name what was denied and the likely reason. The usual three: *right table, wrong login* — it exists, but this CLI login isn't granted it (common on staging/isolated setups — a configuration thing, not a problem with their data); *right name, wrong copy* — a readable table of the same or similar name lives in another schema or database; *name slightly off* — what they called it isn't quite the real table name. For example: "I can't read `analytics.account` — this login doesn't have access to it. That's usually a staging-permissions thing, not a problem with your data." @@ -60,7 +60,7 @@ This is the single source for the rules every child skill follows. Children carr **Scratch files.** Working files — transform/query/patch JSON bodies, notes — go in `./.scratch` in the current working directory, **never `/tmp`**. Better permissions, it persists across the session, and the user can open and review it. `mkdir -p ./.scratch` if it isn't there yet. -**Talking to the user.** Habits the last few demo runs slipped on (see also "Questions must carry their own context" below): +**Talking to the user.** Habits that are easy to slip on (see also "Questions must carry their own context" below): - **Don't reference things they never saw.** If *you* built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. - **Assume they read only the last ~30 lines.** Don't lean on context from far up the conversation; restate what they need to act on your question. From 2a56836e483db8b8a1f92a4b09d6c760044fbfb8 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 13:16:04 -0600 Subject: [PATCH 16/31] Plan mode in data-transformation --- skill-data/data-transformation/SKILL.md | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 2108390..45a2153 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -1,7 +1,7 @@ --- name: data-transformation description: Turn a raw, normalized source database into a small set of clean, analysis-ready tables. Claude investigates the source, works out the real-world "things" the data is about (even when each one is scattered across several tables), decodes coded/JSON/translated values into readable text, and builds one wide, denormalized table per thing as Metabase transforms. Designed for a non-technical user who knows their domain. Use whenever someone wants to "clean up", "flatten", "denormalize", "make sense of", or "build analysis-ready tables from" a raw database. This is the strategy skill for modeling a whole database into a set of clean tables; for authoring or running one individual transform (body shape, flags, run inspection), use the `transform` skill instead. -allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion, EnterPlanMode, ExitPlanMode --- # Data Transformation @@ -72,9 +72,19 @@ As soon as you know which database and schema you're in: - **Ask for a head start.** "Do you have a picture or file showing how your data fits together, like an ERD?" If yes, read it — it shortcuts the next steps. - **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. -### Phase 1 — Investigate (quietly) +### Phase 1 — Investigate (in plan mode, if they choose) -Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. +Orientation done, you're about to go heads-down. First, offer two ways to work: + +> Two ways I can take it from here: +> - **I dig through it all and bring you a complete plan** to approve before I build anything — quieter; you won't hear much until it's ready. +> - **We work it out together** — I share what I find and we make the calls as we go. + +First path: **enter plan mode** (`EnterPlanMode`). Everything up to the agreed table list — investigate, present, prudential calls, naming (Phases 1–3) — happens inside it, read-only; you exit once, at the approval gate before building (Phase 4). Second path: skip it, shape it conversationally through the same phases. Either way, don't build until the design is settled and user-approved. + +Plan mode is a long quiet stretch — they said "go" and walked off. So whenever you surface — a question now, the plan at the end — **carry your own context**: recap what it rests on right before you ask, never a back-reference to something said while they were away (the router's contract spells this out). + +Then dig in. Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. 1. **Map the tables.** List them; pull each one's column names and types; note its own id. 2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's *"Preferred vehicular transport"*. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. 3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to *link* to it, not duplicate it. @@ -104,11 +114,11 @@ If you spotted existing clean data to link to (step 3), raise it here too — an ### Phase 3 — Iterate -Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. +Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. **In plan mode, that recap *is* your exit:** present it as the plan and call `ExitPlanMode` — approval here is the single go-ahead to build. (Iterating together? The recap is just your check before building.) ### Phase 4 — Build, check, hand back -Build one wide transform per agreed thing. Each table: +Design settled — now you build, the first step that writes; plan mode, if you used it, is behind you. Build one wide transform per agreed thing. Each table: - **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. - **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's soft-delete flag — Fivetran's `_fivetran_deleted`, Airbyte's `_ab_cdc_deleted_at`, or a plain `deleted_at`/`is_deleted` — so tombstones never reach clean data; not every source has one). - **Clean, plain column names**, consistent across tables. From 5f2a3919ad016dbc8097223a769cc8c41ec23de0 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 13:53:04 -0600 Subject: [PATCH 17/31] m. --- skill-data/data-transformation/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 45a2153..12c151c 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -29,7 +29,7 @@ If you are making transforms, use the transform skill. A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: - **No modeling jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. -- Don't expect the user to understand raw SQL. +- **Don't lean on raw SQL to communicate.** They may follow a simple `SELECT`, but don't explain your work in SQL or ask them to read or write it. - Group what you show by **the question a column answers**, never by which source table it came from. - Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. - Your user probably says "go" and comes back later. **If you ever ask the user a question, wait for their answer.** @@ -44,7 +44,7 @@ Sort every choice into one of these. 1. Never flatten a multi-valued field into one opaque blob (e.g. three options jammed into `"email | phone | text"`). It destroys filterability, which is the whole point. 2. Never use jargon with the user. 3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. -4. Never guess what a column or code means from its name. Confirm against the actual values. +4. Never guess what a column or code means from its name alone. Confirm against the actual values, then interpret them in context — the table the field belongs to and the business domain it sits in (e.g., a status on orders ≠ status on subscriptions). 5. Never silently drop a whole *thing*. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. 6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id lets two tables be combined later. You're building several tables about *related* things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible and the user can't see it happened. Keep the ids; just don't make the user stare at them. 7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back *out*), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") From 2af86a216cb44eea73f9cbdb14445889629d81db Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 14:12:46 -0600 Subject: [PATCH 18/31] More review feedback. --- skill-data/data-analysis/SKILL.md | 2 +- skill-data/data-transformation/SKILL.md | 65 +++++++++++++++++-------- skill-data/mbql/SKILL.md | 2 +- skill-data/robot-data-engineer/SKILL.md | 30 ++++++------ skill-data/transform/SKILL.md | 2 +- 5 files changed, 63 insertions(+), 38 deletions(-) diff --git a/skill-data/data-analysis/SKILL.md b/skill-data/data-analysis/SKILL.md index 7a04ede..abaffce 100644 --- a/skill-data/data-analysis/SKILL.md +++ b/skill-data/data-analysis/SKILL.md @@ -32,7 +32,7 @@ For each question the user asks: Don't over-interrogate, but settle the things that change the answer: -- **Scope.** All-time or a window? Everyone, or only confirmed/active? A "how many registered" with no status filter and a "how many *confirmed*" are different numbers — pick the one they mean, and say which you used. +- **Scope.** All-time or a window? Everyone, or only confirmed/active? A "how many registered" with no status filter and a "how many _confirmed_" are different numbers — pick the one they mean, and say which you used. - **Cut.** Do they want the headline number, or the number broken down (by role, by company, by version)? A breakdown is usually one `GROUP BY` away and far more useful. - **Form of the answer.** A number in chat? A short written digest? A saved question they can re-open and refilter? If they want something durable or visual, that's the `visualization` skill — hand off. diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 12c151c..f466411 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion, EnterPlanMode, ExitPlan > **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: ask before showing PII row-by-row (names, emails, phones) — default to aggregates; when asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL; honor the autonomy mode the user picked. The jargon rules are spelled out in detail below (**Who you're talking to**). Full contract and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. -Your job: take a raw source database — usually normalized, often synced from some SaaS tool by a connector like Fivetran, Airbyte, or Stitch — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world *thing* the data is about, built as Metabase **transforms** the user can inspect. +Your job: take a raw source database — usually normalized, often synced from some SaaS tool by a connector like Fivetran, Airbyte, or Stitch — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world _thing_ the data is about, built as Metabase **transforms** the user can inspect. Drive everything through the `mb` CLI. Load the skills you'll need: @@ -28,7 +28,7 @@ If you are making transforms, use the transform skill. A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: -- **No modeling jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +- **No modeling jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. - **Don't lean on raw SQL to communicate.** They may follow a simple `SELECT`, but don't explain your work in SQL or ask them to read or write it. - Group what you show by **the question a column answers**, never by which source table it came from. - Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. @@ -41,24 +41,27 @@ A **non-technical user who knows their domain well** — they understand the bus Sort every choice into one of these. **Hard rules — absolutes, never ask:** + 1. Never flatten a multi-valued field into one opaque blob (e.g. three options jammed into `"email | phone | text"`). It destroys filterability, which is the whole point. 2. Never use jargon with the user. 3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. 4. Never guess what a column or code means from its name alone. Confirm against the actual values, then interpret them in context — the table the field belongs to and the business domain it sits in (e.g., a status on orders ≠ status on subscriptions). -5. Never silently drop a whole *thing*. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. -6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id lets two tables be combined later. You're building several tables about *related* things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible and the user can't see it happened. Keep the ids; just don't make the user stare at them. -7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back *out*), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") +5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. +6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible and the user can't see it happened. Keep the ids; just don't make the user stare at them. +7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back _out_), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") 8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. -9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating *your own* transform (`transform update`), never for clobbering another. +9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating _your own_ transform (`transform update`), never for clobbering another. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: + - **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable — a structured column for predefined lists, or a simple join table, never opaque text. Structure is the user's call. Lean: whatever keeps filtering simplest, very possibly flat. - **Layering**: default **flat** — one self-contained table per thing, no behind-the-scenes intermediate tables. Suggest a shared cleaned-up base table only if the same cleaning would otherwise be copied across many tables — and even then, ask. - **Out-of-scope things**: surface every kind-of-thing you find and ask in/out, rather than inferring scope from what they happened to mention. -- **A repeating thing vs. the events it takes part in**: one table can mix a *stable* thing (a customer, a company) with the *repeating* events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) -- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), *how* to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. +- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with the _repeating_ events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) +- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. Phrase a prudential call as a lean plus a nod: + > "I'd keep these as one simple table rather than splitting into behind-the-scenes pieces — easier to look through. Good?" --- @@ -68,6 +71,7 @@ Phrase a prudential call as a lean plus a nod: ### Phase 0 — Get Oriented As soon as you know which database and schema you're in: + - **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. `open` / `xdg-open`); else paste the URL. Don't skip this. - **Ask for a head start.** "Do you have a picture or file showing how your data fits together, like an ERD?" If yes, read it — it shortcuts the next steps. - **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. @@ -77,6 +81,7 @@ As soon as you know which database and schema you're in: Orientation done, you're about to go heads-down. First, offer two ways to work: > Two ways I can take it from here: +> > - **I dig through it all and bring you a complete plan** to approve before I build anything — quieter; you won't hear much until it's ready. > - **We work it out together** — I share what I find and we make the calls as we go. @@ -85,27 +90,31 @@ First path: **enter plan mode** (`EnterPlanMode`). Everything up to the agreed t Plan mode is a long quiet stretch — they said "go" and walked off. So whenever you surface — a question now, the plan at the end — **carry your own context**: recap what it rests on right before you ask, never a back-reference to something said while they were away (the router's contract spells this out). Then dig in. Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. + 1. **Map the tables.** List them; pull each one's column names and types; note its own id. -2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's *"Preferred vehicular transport"*. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. -3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to *link* to it, not duplicate it. +2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's _"Preferred vehicular transport"_. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. +3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. 4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. 5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. 6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. Profile with the cleaning checklist (end of file) in mind — surface the quality smells you hit, don't silently fix them. -7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one *customer* across a main table + a loyalty table + custom-profile columns). Decide "one row per ___" for each and gather its attributes, decoded. Watch for a table that secretly mixes *two* things — a stable thing plus its repeating events; that's the split in the prudential calls above. +7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one _customer_ across a main table + a loyalty table + custom-profile columns). Decide "one row per \_\_\_" for each and gather its attributes, decoded. Watch for a table that secretly mixes _two_ things — a stable thing plus its repeating events; that's the split in the prudential calls above. -**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns. **Don't show it to the user or build any of it.** It only pressure-tests your design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or finer grain *cheaply* preserves that flexibility, keep it. Serve the user's stated concern — but don't scope so tightly that the next question means starting over. +**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns. **Don't show it to the user or build any of it.** It only pressure-tests your design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or finer grain _cheaply_ preserves that flexibility, keep it. Serve the user's stated concern — but don't scope so tightly that the next question means starting over. ### Phase 2 — Present what you found (plain language) Three things, in order: **(a) The things, in plain terms.** One short blurb each. E.g. in an online store: + > **Customers** — one row per customer. Who they are (name, company, location), how they've been in touch, what they've spent, whether they're active or churned. **(b) The full inventory — including what you'd leave out.** Never infer scope silently: + > I found 6 kinds of things: **Customers, Orders, Products, Suppliers, Shipments, Returns.** I'd build the first four. **Shipments** and **Returns** also have real data — want those in, or leave them? **(c) What would be set aside — proactively, ranked, two buckets:** + > Nothing important is lost. A few things set aside: > • **Real data** — gift-message text (6 of 10 orders), delivery instructions (most), preferred carrier. Minor, but real — want any kept? > • **Safe to drop** — duplicate product names in other languages, internal bookkeeping columns. No real loss. @@ -114,27 +123,41 @@ If you spotted existing clean data to link to (step 3), raise it here too — an ### Phase 3 — Iterate -Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. **In plan mode, that recap *is* your exit:** present it as the plan and call `ExitPlanMode` — approval here is the single go-ahead to build. (Iterating together? The recap is just your check before building.) +Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. **In plan mode, that recap _is_ your exit:** present it as the plan and call `ExitPlanMode` — approval here is the single go-ahead to build. (Iterating together? The recap is just your check before building.) ### Phase 4 — Build, check, hand back -Design settled — now you build, the first step that writes; plan mode, if you used it, is behind you. Build one wide transform per agreed thing. Each table: +Design settled — now you build, the first step that writes; plan mode, if you used it, is behind you. Build one wide transform per agreed thing — and build for how it'll be judged: aim for output that's readable on sight, not just one that runs clean. Each table: + - **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. - **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's soft-delete flag — Fivetran's `_fivetran_deleted`, Airbyte's `_ab_cdc_deleted_at`, or a plain `deleted_at`/`is_deleted` — so tombstones never reach clean data; not every source has one). - **Clean, plain column names**, consistent across tables. - **Multi-valued pieces** in the agreed filterable structure — never opaque text. -- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine *beside* the rows, never *instead of* them — a frozen total only ever answers the one question it was summed for. +- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. Then make the links real, not just implied: + - **Wire foreign keys between your tables.** Mark each linking id as a foreign key pointing at the id it references (`mb field update` — set the column's type to foreign-key and its target). Now Metabase itself knows the tables connect and can traverse them. - **Graft onto existing clean data** the user approved (step 3 / Phase 1): point the linking id at the existing table's id the same way. Link, don't duplicate. - **Write down what you learned.** You decoded every column's real meaning while investigating — save it: set a short description on each table and its non-obvious columns (`mb table update` / `mb field update`). The cleaned data then explains itself inside Metabase — in search, in the Question editor, to Metabot — instead of the knowledge living only in this chat. -When you start refining a built transform *with* the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. +When you start refining a built transform _with_ the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. + +**Check the output before handing back — the user can't.** Two passes, in order. + +**Pass 1 — Correctness (did it run right).** After each transform runs, run quick ad-hoc tests against what Phase 0 led you to expect: row counts in the right ballpark, decoded columns readable (no stray codes), linking ids that resolve to the other tables, no column unexpectedly all-null or blown up in count. Treat surprises as bugs to chase, not noise. A table that can't combine with the others — a dropped id, or the same id named two ways — is a silent failure; catch it here. + +**Pass 2 — Fitness (is it nice to use).** Correct isn't the bar; _usable_ is. `SELECT * FROM LIMIT 20` and read every column left to right as if you'd never seen the source: would a non-technical person find each one readable? Smells that say not-yet, even though nothing errored: -**Check the output before handing back — the user can't.** After each transform runs, look at the actual data and run quick ad-hoc tests against what Phase 0 led you to expect: row counts in the right ballpark, decoded columns actually readable (no stray codes), linking ids that resolve to the other tables, no column unexpectedly all-null or blown up in count. Treat surprises as bugs to chase, not noise. A table that can't combine with the others — usually a dropped id, or the same id named two different ways — is a silent failure; catch it here. +- a multi-valued column still a raw JSON/array blob or `["Email","SMS"]` text — rule 1 never actually got resolved; +- decoded answers still carrying raw ids with no readable label, or one cryptic column per code; +- a code sitting beside its own label when only the label is wanted, or two columns saying the same thing; +- a "decoded" column that reads as a slug (`pref_contact_mthd`) rather than plain language. + +A readability smell is a bug: fix it (`transform update`), re-run, look again. When the fix is really a shape choice (how a multi-select is structured) or a keep/drop call, that's the user's — surface it, don't silently decide. Then report plainly: + > Done. Three tables: > • **Customers** — transform #41 > • **Orders** — transform #42 @@ -150,9 +173,10 @@ End on that connection map: it's what the user reads to trust the result, and wh The shape recurs across SaaS exports, whatever the domain. A coded column — say `c_4471` on a responses table — means nothing alone. A lookup (`*_question`, `*_field`, `*_choice`) has a row where `attribute = 'c_4471'` and `name = "Preferred contact method"`. Single-select answers are often already `{"id":…, "value":"Email"}` — use `value`. Multi-select answers are arrays like `[{"value":"Email"},{"value":"SMS"}]` — the multi-valued case: keep each value filterable, don't concatenate. -Always decode *before* presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: -- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come *from* the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. -- **Codes are usually specific to today's data.** `c_4471` exists only for *this* form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. +Always decode _before_ presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: + +- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. +- **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. - **Normalize encodings once.** Turn raw representations clean in the table itself, so nothing downstream re-derives them: signed amounts → clear positive numbers by kind, 0/1 → true/false, timestamps → one consistent timezone, text → trimmed and case-consistent, and junk placeholders (`"NULL"`, `"N/A"`, `"-"`, empty string) → real null. --- @@ -164,6 +188,7 @@ A scan-list, not a pipeline — and the governing rule is **surface what you fin **Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0); soft-deleted rows filtered, bookkeeping columns dropped. **Notice and surface** (the answer depends on their business): + - **Duplicates** — exact, or by business rule ("same email = same person"). Never merge silently. - **Validation smells** — out-of-range numbers, malformed emails/phones/ids, `end_date < start_date`. - **Outliers** — values that read as data-entry errors. Flag, don't drop. diff --git a/skill-data/mbql/SKILL.md b/skill-data/mbql/SKILL.md index 33fc457..1818d9f 100644 --- a/skill-data/mbql/SKILL.md +++ b/skill-data/mbql/SKILL.md @@ -64,7 +64,7 @@ Set an explicit `lib/uuid` only when you must **reference a clause from elsewher (`AGG_UUID` is both the aggregation's own `lib/uuid` and the string the ref points at — one value, by string equality. Every other clause omits its UUID. Expression refs work the same way but key off the expression's `lib/expression-name` string, so expressions rarely need an explicit `lib/uuid`.) -When you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``), or if it looks valid risks colliding with another clause. Only `mb uuid` gives genuine, unique v4s — mint just the few you reference (also covers native template-tag ids and any other `format: "uuid"` slot): +When you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``), or if it looks valid risks colliding with another clause. Only `mb uuid`gives genuine, unique v4s — mint just the few you reference (also covers native template-tag ids and any other`format: "uuid"` slot): ```bash mb uuid --count 2 --json # mint only the clauses you actually reference diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index 0f4c953..ecdbf2e 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -45,24 +45,24 @@ This is the single source for the rules every child skill follows. Children carr **Who you're talking to.** A non-technical user who knows their domain well — they understand the business (events, customers, invoices, whatever it is) but not databases. Talk in their terms. -**Jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, normalize, denormalize, surrogate key, materialize — and prefer plain phrasing: "one row per ___", "what it tells you", "links up with", "how full a column is". But don't overdo it: they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key, cardinality. **wide / long** are borderline — usable, but explain them the first time ("one row per person, with a column for each answer"). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. +**Jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, normalize, denormalize, surrogate key, materialize — and prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is". But don't overdo it: they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key, cardinality. **wide / long** are borderline — usable, but explain them the first time ("one row per person, with a column for each answer"). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. **PII.** Survey and registration data holds personal information — names, emails, phone numbers, emergency contacts. Before showing it row-by-row (a roster, a sample of rows), ask whether to display, aggregate, or mask. Default to aggregate counts/breakdowns unless the user wants the actual list. -**Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI *can* do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. +**Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI _can_ do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. -**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a *different* readable table and present its numbers as the answer (that's how a question about the customers table gets silently answered with a lookalike table from another schema). Instead, in order: +**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a _different_ readable table and present its numbers as the answer (that's how a question about the customers table gets silently answered with a lookalike table from another schema). Instead, in order: 1. **Stop.** Don't substitute another table and pass it off as the answer. -2. **Surface and diagnose in plain, friendly terms.** Name what was denied and the likely reason. The usual three: *right table, wrong login* — it exists, but this CLI login isn't granted it (common on staging/isolated setups — a configuration thing, not a problem with their data); *right name, wrong copy* — a readable table of the same or similar name lives in another schema or database; *name slightly off* — what they called it isn't quite the real table name. For example: "I can't read `analytics.account` — this login doesn't have access to it. That's usually a staging-permissions thing, not a problem with your data." -3. **Offer to search — don't auto-crawl.** Ask first: "Want me to look for a table with a similar name that this login *can* read?" Only on yes, run `mb search ` / `mb table list`, and surface any match as a **confirm question**, never as a substituted answer: "There's `dbt_models.account` I can read — did you mean that one?" +2. **Surface and diagnose in plain, friendly terms.** Name what was denied and the likely reason. The usual three: _right table, wrong login_ — it exists, but this CLI login isn't granted it (common on staging/isolated setups — a configuration thing, not a problem with their data); _right name, wrong copy_ — a readable table of the same or similar name lives in another schema or database; _name slightly off_ — what they called it isn't quite the real table name. For example: "I can't read `analytics.account` — this login doesn't have access to it. That's usually a staging-permissions thing, not a problem with your data." +3. **Offer to search — don't auto-crawl.** Ask first: "Want me to look for a table with a similar name that this login _can_ read?" Only on yes, run `mb search ` / `mb table list`, and surface any match as a **confirm question**, never as a substituted answer: "There's `dbt_models.account` I can read — did you mean that one?" 4. **Hand control back.** Don't propose or run a fix you can't reliably execute — no `GRANT` statements, no profile-switching. The recovery is the user's call. **Scratch files.** Working files — transform/query/patch JSON bodies, notes — go in `./.scratch` in the current working directory, **never `/tmp`**. Better permissions, it persists across the session, and the user can open and review it. `mkdir -p ./.scratch` if it isn't there yet. **Talking to the user.** Habits that are easy to slip on (see also "Questions must carry their own context" below): -- **Don't reference things they never saw.** If *you* built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. +- **Don't reference things they never saw.** If _you_ built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. - **Assume they read only the last ~30 lines.** Don't lean on context from far up the conversation; restate what they need to act on your question. - **Plain permission requests.** Don't paste a wall of SQL or JSON and ask "run this?". Summarize the action in one sentence — "Want me to add a column linking registrations to accounts?" — and offer to show the details if they ask. @@ -75,7 +75,7 @@ This is the single source for the rules every child skill follows. Children carr **When genuinely unsure, ask — never assume.** -**Questions must carry their own context.** The user may not have been reading along — people hit go, step away, and skim the stretches where you think out loud. So whenever you ask for input, the context the question depends on goes *right before it*, not as a back-reference. "Given the mismatch I found earlier, what would you like to do?" forces a scroll-back; lead with a short recap instead: +**Questions must carry their own context.** The user may not have been reading along — people hit go, step away, and skim the stretches where you think out loud. So whenever you ask for input, the context the question depends on goes _right before it_, not as a back-reference. "Given the mismatch I found earlier, what would you like to do?" forces a scroll-back; lead with a short recap instead: > I have a question for you — quick recap so it makes sense: > @@ -93,14 +93,14 @@ Recap only the few points the question turns on — enough to answer cold, not a ## Work out where they are, then route -Don't make the user name a *stage* — but do find out *where their data lives* before you go looking for it. +Don't make the user name a _stage_ — but do find out _where their data lives_ before you go looking for it. **Ask before you crawl.** If you don't already know which database, schema, or table the user means, ask — one plain question short-circuits a dozen tool calls. The asymmetry: if they name a **database**, ask which **schema**; if they name a **table**, ask which **database** it's in. "If you don't know, no problem — I'll look" is the fallback, not the opening move. Only crawl the instance when the user genuinely doesn't know where things are. **When you do crawl — the efficient ladder** (cheap, narrowest-first; never pull whole-warehouse rollups): - Walk down: `mb db list` → `mb db schemas ` → `mb db schema-tables ` → `mb table list [--db-id]` → `mb table fields ` / `mb table metadata `. -- Have a *name* to look for rather than a tree to walk? Use `mb search [--models] [--db-id]` instead of crawling. +- Have a _name_ to look for rather than a tree to walk? Use `mb search [--models] [--db-id]` instead of crawling. - Need to know what's actually in a column? `mb field summary ` (row/distinct counts) and `mb field values ` (sample values). - **If a database looks freshly connected, or a table the user expects isn't showing up, offer to sync** — `mb db sync-schema --wait` — before concluding the table doesn't exist. @@ -108,13 +108,13 @@ Don't make the user name a *stage* — but do find out *where their data lives* **Map goal + state to a skill:** -| What the user wants / what's there | Load | -| -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -| "Clean up / flatten / make sense of" raw, normalized data; no clean tables yet | `data-transformation` | -| Clean tables exist; "make this reusable", "define active customers / revenue / MRR officially", "so everyone uses the same definition" | `semantic-layer` | -| Tables (and maybe definitions) exist; "chart this", "build a dashboard", "show me X over time" | `visualization` | +| What the user wants / what's there | Load | +| -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| "Clean up / flatten / make sense of" raw, normalized data; no clean tables yet | `data-transformation` | +| Clean tables exist; "make this reusable", "define active customers / revenue / MRR officially", "so everyone uses the same definition" | `semantic-layer` | +| Tables (and maybe definitions) exist; "chart this", "build a dashboard", "show me X over time" | `visualization` | | Clean tables exist; "answer this question", "who registered", "what did people say", "analyze / report on / summarize X" (wants a written answer, not a chart) | `data-analysis` | -| "Do the whole thing" / "set up analytics for X" from raw data | start at `data-transformation`, then continue down the journey (see below) | +| "Do the whole thing" / "set up analytics for X" from raw data | start at `data-transformation`, then continue down the journey (see below) | Load a skill with `mb skills get `. Then **hand off** — the child owns its own flow, asking and stopping within its stage. Don't narrate the child's work or duplicate its steps. diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index dcfcd8d..5ef21c3 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion A **transform** persists the result of a query (native SQL or MBQL) to a warehouse table the user can read from cards, dashboards, and other transforms. It runs on a schedule (via `transform-job`) or on-demand (`transform run`). -Flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding *which* transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). +Flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding _which_ transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). ## Body shape From 2e1bc0261f1910a298979ecb2aca4b4eaac3c0e4 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 14:54:26 -0600 Subject: [PATCH 19/31] Last piece of feedback --- skill-data/data-transformation/SKILL.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index f466411..097b11f 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -70,6 +70,8 @@ Phrase a prudential call as a lean plus a nod: ### Phase 0 — Get Oriented +**Pin down where the data lives — ask before you hunt.** A table or schema name the user mentions tells you _what_ but not _where_: an instance can hold several databases, each with several schemas. Rather than listing them all to find it, just ask — "Which database is this in, and the schema if you know it? No worries if you're not sure, I can find it." A confident answer short-circuits a lot of blind searching; "not sure" costs nothing and you fall back to locating it yourself. If you've genuinely looked and still can't find a table the user is sure is there, don't keep digging. One possible reason is that Metabase hasn't picked up that database's latest schema yet — gently raise it and ask whether the data's been synced recently, and let the user run the sync from Metabase if it's needed. + As soon as you know which database and schema you're in: - **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. `open` / `xdg-open`); else paste the URL. Don't skip this. From 15edc3a593f3c6bb03f1ad5614c38fc7f1e5b204 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 14:55:27 -0600 Subject: [PATCH 20/31] Manual shrinking of data-transformation skill --- skill-data/data-transformation/SKILL.md | 46 ++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 097b11f..7630137 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -18,7 +18,7 @@ mb skills get mbql # if you build transform queries in MBQL mb skills get transform # creating/running transforms, run inspection ``` -Authentication is the user's job — pick the profile per `core`'s **Auth & profiles** section and pass `--profile ` to every command. That profile's `url` is the instance's base URL — build every browser link below from it, so what you open matches the instance the CLI is hitting. +Users authenticate. You pick the profile per `core`'s **Auth & profiles** and pass `--profile ` to every command. That profile's `url` is the instance's base URL. Browser links below are built from it, ensuring the links are consistent with your CLI usage. If you are making transforms, use the transform skill. @@ -26,13 +26,13 @@ If you are making transforms, use the transform skill. ## Who you're talking to -A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, whatever it is) but not databases. So: +A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, etc.) but not databases. -- **No modeling jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. -- **Don't lean on raw SQL to communicate.** They may follow a simple `SELECT`, but don't explain your work in SQL or ask them to read or write it. +- **No modeling jargon.** Skip warehouse vocabulary — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're not database jargon. +- **Don't lean on raw SQL to communicate.** They may follow a simple `SELECT`, but don't explain work via SQL or ask them to read/write it. - Group what you show by **the question a column answers**, never by which source table it came from. -- Be a **helpful assistant, not an engineer reporting status.** Elide the machinery; ask the one sharp question that matters. -- Your user probably says "go" and comes back later. **If you ever ask the user a question, wait for their answer.** +- Be a **helpful assistant, not an engineer reporting status.** Elide machinery; ask sharp questions that matter. +- Your user may say "go" and come back later. **If you ever ask the user a question, wait for their answer.** --- @@ -42,23 +42,23 @@ Sort every choice into one of these. **Hard rules — absolutes, never ask:** -1. Never flatten a multi-valued field into one opaque blob (e.g. three options jammed into `"email | phone | text"`). It destroys filterability, which is the whole point. -2. Never use jargon with the user. -3. Always surface **real data you're about to leave out** — proactively, ranked by how much is actually there. -4. Never guess what a column or code means from its name alone. Confirm against the actual values, then interpret them in context — the table the field belongs to and the business domain it sits in (e.g., a status on orders ≠ status on subscriptions). +1. Never flatten multi-valued fields into opaque blobs (e.g. three options squished: `"email | phone | text"`). It destroys filterability (the whole point). +2. Never use jargon with the user. Explain by domain and telos. +3. Always surface **real data you're about to leave out** proactively, ranked by how much is extant. +4. Never guess what schema mean from their name alone. Confirm against actual values, interpret them in context: the table the field belongs to and the relevant domain (e.g., a status on orders ≠ status on subscriptions). 5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. -6. Never drop the columns that link things together. Every table keeps its own id **and** the ids tying it to your other tables — alongside the readable labels you copy in, not instead of them. The label is for reading; the id lets two tables be combined later. You're building several tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — a dropped id makes that quietly impossible and the user can't see it happened. Keep the ids; just don't make the user stare at them. -7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets (a refund is money back _out_), which row is someone's "current" one, what "active" means — say it back in one plain sentence and get a yes first. You know the columns; only they know the business, and a wrong rule hides perfectly inside a clean-looking table. ("I'm treating each person's most recent sign-up as their current one — right?") -8. Never quietly carry sensitive personal data through. Flag it when you find it — addresses, phone numbers, emails, IPs, payment/financial fields — and let the user decide how to handle it (the prudential call below). Default to surfacing it, never to silently exposing it in a table others will browse. -9. Never overwrite an existing table or another transform's output. Before building, check the target name is actually free (`mb transform list`, `mb table list`); if something already writes there, stop and surface it — building over it silently destroys their data. Reusing a name is only ever for updating _your own_ transform (`transform update`), never for clobbering another. +6. Never drop columns that link things together. Every table keeps its own id **and** the ids tying it to other tables — alongside the readable labels you copy in, not instead of. The label is for reading; the id is for joining. You're building tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — dropped ids make that quietly impossible and the user will regret it. Keep the ids; don't force the user to stare at them. +7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets, which row is the "current" one, what "active" means — say it back in one plain sentence and get a yes/no first. You know only the columns; they know the business. Wrong rules hide insidiously in clean-looking tables. ("I'm treating each person's most recent sign-up as their current one — right?") +8. Never sneak sensitive personal data through. Flag it when appropriate — addresses, phone numbers, emails, IPs, financial, etc. — and ask the user how to handle it (the prudential call below). Always surface, never silently expose it in a table others will browse. +9. Never overwrite existing tables or other transforms' outputs. Before building, check the target name is unused (`mb transform list`, `mb table list`); if it's in use, stop and surface it — building over it silently destroys their data. Reuse names only for updating _your own_ transform (`transform update`), never for clobbering another. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: -- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable — a structured column for predefined lists, or a simple join table, never opaque text. Structure is the user's call. Lean: whatever keeps filtering simplest, very possibly flat. -- **Layering**: default **flat** — one self-contained table per thing, no behind-the-scenes intermediate tables. Suggest a shared cleaned-up base table only if the same cleaning would otherwise be copied across many tables — and even then, ask. -- **Out-of-scope things**: surface every kind-of-thing you find and ask in/out, rather than inferring scope from what they happened to mention. -- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with the _repeating_ events it's in (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider giving it its own one-row-per-thing table too, linked by id, so "how many distinct customers" and the per-customer details have a clean home. Lean: split when recurrence is real, keep as one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) -- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is the user's call — keep as-is, mask (last-4, domain-only, city not street), or drop. Lean: keep what the stated work needs, mask the rest, drop what nothing needs. +- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable! Structured columns for predefined lists, or simple join tables, never opaque text. Structure is the user's call. Lean: easiest filtering, probably flat. +- **Layering**: default **flat** — one self-contained table per thing, no hidden intermediate tables. Suggest a shared cleaned-up base table only for DRY, avoiding copying complex logic across many transforms. Even then, ask. +- **Out-of-scope things**: surface every domain-model you find and ask in/out, rather than inferring scope from what they happened to mention. +- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with _repeating_ events (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider a one-row-per-thing table too, linked by id, so "how many distinct X" and the per-X details have clean homes. Lean: split when recurrence is real, but one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) +- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is user's choice — keep as-is, mask (partial redaction), or drop. Lean: keep what is needed, mask the rest, drop the useless. Phrase a prudential call as a lean plus a nod: @@ -131,10 +131,10 @@ Cheap, because nothing's built. Adjust the set of things, what's kept, and the s Design settled — now you build, the first step that writes; plan mode, if you used it, is behind you. Build one wide transform per agreed thing — and build for how it'll be judged: aim for output that's readable on sight, not just one that runs clean. Each table: -- **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too). The label is for reading; the id keeps the tables combinable. Use the same id name everywhere a thing appears. +- **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too, per rule 6). Use the same id name everywhere a thing appears. - **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's soft-delete flag — Fivetran's `_fivetran_deleted`, Airbyte's `_ab_cdc_deleted_at`, or a plain `deleted_at`/`is_deleted` — so tombstones never reach clean data; not every source has one). - **Clean, plain column names**, consistent across tables. -- **Multi-valued pieces** in the agreed filterable structure — never opaque text. +- **Multi-valued pieces** in the agreed filterable structure (rule 1). - **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. Then make the links real, not just implied: @@ -187,7 +187,7 @@ Always decode _before_ presenting, so the user sees "Preferred contact method", A scan-list, not a pipeline — and the governing rule is **surface what you find, don't silently "fix" it.** Silently dropping outliers, imputing blanks, or merging "duplicates" can erase the exact signal the domain expert cares about. Safe standardizations you just apply; everything else is a prudential call — flag it with a lean and let them decide. -**Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0); soft-deleted rows filtered, bookkeeping columns dropped. +**Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0). **Notice and surface** (the answer depends on their business): @@ -197,4 +197,4 @@ A scan-list, not a pipeline — and the governing rule is **surface what you fin - **Missing data** — random vs. systematic? Surface the pattern; never silently impute or default. - **Free text / mixed encodings** — handle the safe parts, flag the rest. -Already covered by the rules above, listed so they stay on your radar: structural reshaping (decode/JSON/multi-value), orphans & key validity (Phase 0 step 5 + the post-run check), and recording meanings (the descriptions step). +Already covered by the rules above, listed so they stay on your radar: structural reshaping (decode/JSON/multi-value), orphans & key validity (Phase 0 step 5 + the post-run check), filtering soft-deletes & dropping bookkeeping columns (Phase 4's **Decoded** step), and recording meanings (the descriptions step). From 83282c21332bafb631e44f48365ff7ef81ecc3ec Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Tue, 2 Jun 2026 15:52:09 -0600 Subject: [PATCH 21/31] Possible pretty-print transform fix --- skill-data/transform/SKILL.md | 66 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index 5ef21c3..6a445c7 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -24,28 +24,23 @@ For an **MBQL 5** `source.query` (`lib/type: "mbql/query"`), the body shape, the ## Create + run (native SQL) ```bash -cat > ./.scratch/transform.json <<'EOF' -{ - "name": "user_counts_by_signup_year", - "description": "Sample transform: counts users by year of signup", - "source": { - "type": "query", - "query": { - "type": "native", - "database": , - "native": { - "query": "SELECT date_trunc('year', created_at)::date AS signup_year, COUNT(*)::int AS user_count FROM public.users GROUP BY 1 ORDER BY 1" - } - } - }, - "target": { - "type": "table", - "database": , - "schema": "public", - "name": "user_counts_by_signup_year" - } -} -EOF +# Author the SQL formatted — it's what `mb transform get` and the Metabase editor show. +cat > ./.scratch/user_counts_by_signup_year.sql <<'SQL' +SELECT + date_trunc('year', created_at)::date AS signup_year, + COUNT(*)::int AS user_count +FROM public.users +GROUP BY 1 +ORDER BY 1 +SQL + +# Embed it with jq --rawfile so the newlines survive as \n in valid JSON (don't hand-write the SQL as one line). +jq -n --rawfile q ./.scratch/user_counts_by_signup_year.sql \ + '{ name: "user_counts_by_signup_year", + description: "Sample transform: counts users by year of signup", + source: { type: "query", query: { type: "native", database: , native: { query: $q } } }, + target: { type: "table", database: , schema: "public", name: "user_counts_by_signup_year" } }' \ + > ./.scratch/transform.json TRANSFORM_ID=$(mb transform create --file ./.scratch/transform.json --profile --json | jq -r '.id') mb transform run "$TRANSFORM_ID" --wait --profile --json @@ -58,7 +53,7 @@ Notes: - `--wait` on `transform run` polls until status is `succeeded` or `failed`. Without it you only get `{message: "Transform run started", run_id, final: null}` and have to poll yourself. - `--sync` implies `--wait`, then waits until the run's output table is registered — the run registers it itself, no `db sync-schema` needed — adding `target_table_id` to the envelope. Use it when you'll build MBQL on the output (see "Inspect"). - The `--json` envelope is shape-stable: `{message, run_id, final}` (plus `target_table_id` under `--sync` — a number, or `null` if the table didn't register before the timeout). `final` is `null` when `--wait` is omitted or the run never started, otherwise a full `TransformRun` object with `status` and `message`. On a failed run (`final.status` ∈ {`failed`, `timeout`, `canceled`}) the CLI exits 1 and writes a one-line summary `transform run failed` to stderr; the failure detail lives only in `final.message` on stdout, so `jq -r '.final.message'` is where to look. -- The heredoc with single-quoted `'EOF'` prevents shell from interpolating any `$vars` inside the SQL. +- **Keep the SQL formatted.** Author it multi-line in `./.scratch/.sql` and embed with `jq --rawfile` (jq ≥1.6, which JSON-encodes the file so newlines become `\n`). The stored `native.query` is what `mb transform get` and the Metabase editor render — a single-line blob is valid JSON but unreadable when anyone opens the transform. Single-quote the heredoc delimiter (`<<'SQL'`) so the shell leaves `$vars` in the query alone (e.g. Postgres `$1`, `$$`). - `transform create --json` returns the agent-facing compact projection: `{id, name, description, source_type, target: {type, database, schema, name}, target_db_id}`. Read `target.schema`/`target.name` directly off the create output — no follow-up `transform get` needed to verify where the transform will write. - If a transform with the same `name` already has a YAML representation on disk under the configured remote-sync repo, `create` mints a `_2` suffix on the exported filename (the new transform gets a fresh `entity_id`; the prior one isn't touched). For "iterate on the same concept" workflows, prefer `transform update ` — see "Iterating on a failing transform" below. - **`collection_id` only accepts a collection in the `:transforms` namespace.** Transforms aren't filed next to cards and dashboards — passing a normal analytics collection id (the kind a dashboard lives in) fails create/update with `collection_id: A Transform can only go in Collections in the :transforms namespace.` Omit `collection_id` to leave the transform uncollected (the common case), or create one with `mb collection create --body '{"name":"…"}' --namespace transforms --json` and pass the returned `id`. Cards and dashboards you build **on top of** the transform's output table go in ordinary collections as usual — so "put the transform and its dashboard in collection X" generally means _X holds the dashboard + cards; the transform stays in the transforms namespace._ @@ -130,12 +125,14 @@ Right shape — patch only what changes: # Rename only: mb transform update --body '{"name":"renamed"}' --profile --json -# Rewrite the SQL only: -cat > ./.scratch/patch.json <<'EOF' -{ "source": { "type": "query", "query": { "type": "native", - "database": , - "native": { "query": "SELECT … FROM public.orders" } } } } -EOF +# Rewrite the SQL only — author it formatted, embed with jq: +cat > ./.scratch/orders.sql <<'SQL' +SELECT … +FROM public.orders +SQL +jq -n --rawfile q ./.scratch/orders.sql \ + '{ source: { type: "query", query: { type: "native", database: , native: { query: $q } } } }' \ + > ./.scratch/patch.json mb transform update --file ./.scratch/patch.json --profile --json # Change tag membership (note: tag_ids, not tags): @@ -168,11 +165,12 @@ mb transform run "$ID" --wait --profile --json # → failed # 2. Fix the body in place; PATCH only what changed. # Source-only patch — keeps name, target, tags untouched on the server. -cat > ./.scratch/source-patch.json <<'EOF' -{ "source": { "type": "query", "query": { "type": "native", - "database": , - "native": { "query": "" } } } } -EOF +cat > ./.scratch/source.sql <<'SQL' + +SQL +jq -n --rawfile q ./.scratch/source.sql \ + '{ source: { type: "query", query: { type: "native", database: , native: { query: $q } } } }' \ + > ./.scratch/source-patch.json mb transform update "$ID" --file ./.scratch/source-patch.json --profile --json # 3. Re-run From 830f6abb3cc6f9e5b895956c8d0bfa218ad8f33e Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 16:44:27 -0600 Subject: [PATCH 22/31] Promote robot-data-engineer as the front-door entrypoint The whole-journey router was buried at the bottom of core's specialized-skills list, ranked as a peer of git-sync/mbql, and the autoloaded discovery stub only pointed at core. An outcome-seeking user ("make sense of my data", "build a dashboard") had no direct path to the router that's meant to run first. - Stub: add a journey-intent fast path straight to `mb skills get robot-data-engineer` before loading the dense core ref. - Core: hoist robot-data-engineer to the top of the list with a "start here for anything bigger than one command" lead-in, add data-analysis to its routing targets, drop the "name TBD" marker. - README: drop "name TBD" from the bundled-skills table. --- README.md | 2 +- skill-data/core/SKILL.md | 4 +++- skills/metabase-cli/SKILL.md | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8ed76ac..4154a3d 100644 --- a/README.md +++ b/README.md @@ -1344,7 +1344,7 @@ Bundled skills: | `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | | `data-transformation` | Raw, normalized source database → clean, wide, analysis-ready tables for a non-technical user | | `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | -| `robot-data-engineer` | Front-door router for the whole journey (raw → tables → definitions → dashboards); name TBD | +| `robot-data-engineer` | Front-door router for the whole journey (raw → tables → definitions → dashboards) | | `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | | `git-sync` | Round-tripping Metabase content to/from a git remote | diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index bddc3e8..5e3c34e 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -145,12 +145,14 @@ Routine verb shapes (list / get / create / update), every flag, and output JSON This core file is enough for any single-command task. Load the relevant skill **proactively** when intent matches — don't wing an MBQL body, a transform body, or the git-sync workflow from this overview alone. Load via `mb skills get `. +**Start here for anything bigger than one command.** If the user wants an outcome rather than a single verb — "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst", "set up analytics for X", "answer questions about my data" — load `robot-data-engineer` first and let it route. The rest of this list is the toolbox it routes into. + +- **`robot-data-engineer`** — the front-door router for the whole journey (raw data → clean tables → reusable definitions → dashboards or written answers) for a non-technical user. Detects where the user is, sets up auth and autonomy once, and routes to `data-transformation` / `semantic-layer` / `visualization` / `data-analysis`. Load this when the user describes a goal, not a step. - **`mbql`** — authoring or fixing any MBQL query body: `mb query`, a card `dataset_query`, a transform `source.query`, a measure/segment `definition`, "aggregate and group by", reading `--dry-run` errors. The query-body reference. - **`viz`** — choosing a card's `display` and authoring `visualization_settings`: "make it a bar chart", "set the pie dimension/metric", "format this column as currency", "the card renders as a table instead of a chart". The presentation counterpart to `mbql`. - **`transform`** — "create a transform", "run a transform", authoring transform body JSON, run inspection. - **`data-transformation`** — the higher-level workflow: turning a raw, normalized source database into a small set of clean, wide, analysis-ready tables for a non-technical user — "clean up", "flatten", "denormalize", "make sense of this database", "build analysis-ready tables". Wraps `transform` (the mechanics) with the investigate → propose → build flow. - **`semantic-layer`** — turning clean tables into reusable definitions: "make this filter reusable", "define active customers / net revenue / MRR officially", "create a segment / measure / metric", "so everyone uses the same definition". Builds on `mbql` (the definition bodies) and `transform` (widen a table first when a definition needs more than one). -- **`robot-data-engineer`** — the front-door router for the whole journey (raw data → clean tables → reusable definitions → dashboards) for a non-technical user: "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst". Detects where the user is and routes to `data-transformation` / `semantic-layer` / `visualization`. (Working title — name TBD.) - **`git-sync`** — "import the latest changes", "export to git", "git sync", "dirty check", "stash before pulling". If a task spans more than one, load each. Specialized skills assume the conventions above and won't repeat them. `mb skills list` enumerates everything on the installed version. diff --git a/skills/metabase-cli/SKILL.md b/skills/metabase-cli/SKILL.md index f463d41..eabc850 100644 --- a/skills/metabase-cli/SKILL.md +++ b/skills/metabase-cli/SKILL.md @@ -19,3 +19,9 @@ Before running any `mb` command, load the workflow content from the CLI: mb skills get core # auth, flag conventions, every command group mb skills list # everything available on the installed version ``` + +**Doing a whole job, not one command?** If the user wants an outcome — "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "be my data analyst", "set up analytics for X" — load the front-door router instead and let it drive: + +```bash +mb skills get robot-data-engineer +``` From e420a65fd68f28b8e2efbb13a9e0e64f72c2cb79 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 16:47:05 -0600 Subject: [PATCH 23/31] Clear skillsaw warnings: bump skill token limit, drop hedge - context-budget warn 5100 -> 6000 (data-transformation's honest floor grew to ~5,805 tokens). - Reword 'flag it when appropriate' -> 'flag it on sight' to drop the hedging the content-weak-language rule flags. --- .skillsaw.yaml | 4 ++-- skill-data/data-transformation/SKILL.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.skillsaw.yaml b/.skillsaw.yaml index 53d5328..d87361d 100644 --- a/.skillsaw.yaml +++ b/.skillsaw.yaml @@ -15,9 +15,9 @@ rules: # always-resident skills (core, robot-data-engineer) are the tightest # of the set; the larger ones (data-transformation, semantic-layer, # mbql) are leaf skills loaded for a single stage, where the extra - # tokens are genuine guidance, not fluff. 5100 clears the largest + # tokens are genuine guidance, not fluff. 6000 clears the largest # honest floor while still catching real future bloat. skill: - warn: 5100 + warn: 6000 skill-description: warn: 200 diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md index 7630137..84f6a5c 100644 --- a/skill-data/data-transformation/SKILL.md +++ b/skill-data/data-transformation/SKILL.md @@ -49,7 +49,7 @@ Sort every choice into one of these. 5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. 6. Never drop columns that link things together. Every table keeps its own id **and** the ids tying it to other tables — alongside the readable labels you copy in, not instead of. The label is for reading; the id is for joining. You're building tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — dropped ids make that quietly impossible and the user will regret it. Keep the ids; don't force the user to stare at them. 7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets, which row is the "current" one, what "active" means — say it back in one plain sentence and get a yes/no first. You know only the columns; they know the business. Wrong rules hide insidiously in clean-looking tables. ("I'm treating each person's most recent sign-up as their current one — right?") -8. Never sneak sensitive personal data through. Flag it when appropriate — addresses, phone numbers, emails, IPs, financial, etc. — and ask the user how to handle it (the prudential call below). Always surface, never silently expose it in a table others will browse. +8. Never sneak sensitive personal data through. Flag it on sight — addresses, phone numbers, emails, IPs, financial, etc. — and ask the user how to handle it (the prudential call below). Always surface, never silently expose it in a table others will browse. 9. Never overwrite existing tables or other transforms' outputs. Before building, check the target name is unused (`mb transform list`, `mb table list`); if it's in use, stop and surface it — building over it silently destroys their data. Reuse names only for updating _your own_ transform (`transform update`), never for clobbering another. **Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: From 9c370353b1076e995f59e0024d3d98e9e49c2eaf Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 16:51:44 -0600 Subject: [PATCH 24/31] Release 0.1.11 Ships the robot-data-engineer entrypoint promotion. release.yml auto-publishes on push to main only when package.json's version is not yet on npm, so the bump is required for the skill changes to reach installed CLIs via mb skills get. --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index a65ec96..41f93bb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@metabase/cli", - "version": "0.1.10", + "version": "0.1.11", "description": "Metabase CLI", "license": "AGPL-3.0", "repository": { From 89f23befd68e65e85c7335632c9d6522eb60b332 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 16:58:26 -0600 Subject: [PATCH 25/31] Lead marketplace plugin description with the data-analyst journey An unaware user describes a goal ('make sense of my data', 'be my data analyst'), and Claude matches it against the plugin description to decide relevance. The old description was CRUD/CLI-only, so a journey-shaped request matched nothing. Lead with the journey trigger phrases (mirrored from robot-data-engineer); keep CRUD + git-sync as the second half. --- .claude-plugin/marketplace.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 90e6f1a..0af6cbb 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "plugins": [ { "name": "metabase-cli", - "description": "Drive a Metabase instance from the terminal via the `mb` CLI: auth, list/get/create/update/delete on every resource, run queries and transforms, git-sync content to and from a remote. Bundles transform and git-sync references as on-demand skills served by `mb skills get`.", + "description": "Be your data analyst / data engineer for Metabase, from the terminal via the `mb` CLI. Go from raw data to something a non-technical person can use: clean tables, reusable metrics, dashboards, and written answers. Use when someone wants to \"make sense of my data\", \"build a data model\", \"go from raw data to a dashboard\", \"answer questions about my data\", \"report on who registered / signed up / responded\", \"analyze X\", or \"set up analytics for X\". Also full CRUD on every Metabase resource (cards, dashboards, transforms, queries), git-sync content to and from a remote, and on-demand workflow skills served by `mb skills get`.", "source": "./", "strict": false, "skills": ["./skills/metabase-cli"], From 8e687ba7bd9092e4065336fabd5157d4addedd0c Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Tue, 2 Jun 2026 16:59:45 -0600 Subject: [PATCH 26/31] Drop generic 'analyze X' trigger from plugin description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'analyze X' over-triggers — it matches any analysis request (logs, code, a CSV, an image), not just Metabase data work. The remaining data-anchored phrases ('make sense of my data', 'answer questions about my data', 'report on who registered', 'set up analytics for X') already cover the intent without the false positives. --- .claude-plugin/marketplace.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 0af6cbb..e44b5e6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "plugins": [ { "name": "metabase-cli", - "description": "Be your data analyst / data engineer for Metabase, from the terminal via the `mb` CLI. Go from raw data to something a non-technical person can use: clean tables, reusable metrics, dashboards, and written answers. Use when someone wants to \"make sense of my data\", \"build a data model\", \"go from raw data to a dashboard\", \"answer questions about my data\", \"report on who registered / signed up / responded\", \"analyze X\", or \"set up analytics for X\". Also full CRUD on every Metabase resource (cards, dashboards, transforms, queries), git-sync content to and from a remote, and on-demand workflow skills served by `mb skills get`.", + "description": "Be your data analyst / data engineer for Metabase, from the terminal via the `mb` CLI. Go from raw data to something a non-technical person can use: clean tables, reusable metrics, dashboards, and written answers. Use when someone wants to \"make sense of my data\", \"build a data model\", \"go from raw data to a dashboard\", \"answer questions about my data\", \"report on who registered / signed up / responded\", or \"set up analytics for X\". Also full CRUD on every Metabase resource (cards, dashboards, transforms, queries), git-sync content to and from a remote, and on-demand workflow skills served by `mb skills get`.", "source": "./", "strict": false, "skills": ["./skills/metabase-cli"], From dd75b07016bf0113b6f151d1a815c59cce5c0e0d Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Wed, 3 Jun 2026 09:53:03 -0600 Subject: [PATCH 27/31] Fix skills e2e: add data-analysis to bundled skill list The data-analysis skill was added to skill-data/ but the e2e test's BUNDLED_VISIBLE_NAMES still listed nine skills, so list/path/get-all assertions and the unknown-skill 'available' message failed across all E2E matrix lanes. --- tests/e2e/skills.e2e.test.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/e2e/skills.e2e.test.ts b/tests/e2e/skills.e2e.test.ts index a6b8c47..13249cd 100644 --- a/tests/e2e/skills.e2e.test.ts +++ b/tests/e2e/skills.e2e.test.ts @@ -9,6 +9,7 @@ import { cleanupConfigHome, mkTempConfigHome, runCli } from "./run-cli"; const BUNDLED_VISIBLE_NAMES = [ "core", + "data-analysis", "data-transformation", "document", "git-sync", @@ -32,7 +33,7 @@ describe("skills e2e", () => { return dir; } - it("list returns the nine bundled non-hidden skills, sorted by name", async () => { + it("list returns the ten bundled non-hidden skills, sorted by name", async () => { const result = await runCli({ args: ["skills", "list", "--json"], configHome: await makeIsolatedConfigHome(), @@ -123,7 +124,7 @@ describe("skills e2e", () => { expect(result.exitCode).toBe(2); expect(result.stderr).toContain( - "unknown skill name(s): does-not-exist (available: core, document, git-sync, mbql, transform, visualization)", + "unknown skill name(s): does-not-exist (available: core, data-analysis, data-transformation, document, git-sync, mbql, robot-data-engineer, semantic-layer, transform, visualization)", ); }); From a181d3ed408d81880b0ff09e27da35cd1f07dc59 Mon Sep 17 00:00:00 2001 From: Timothy Dean Date: Wed, 3 Jun 2026 09:57:34 -0600 Subject: [PATCH 28/31] Tighten robot-data-engineer scope --- skill-data/robot-data-engineer/SKILL.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md index ecdbf2e..1530350 100644 --- a/skill-data/robot-data-engineer/SKILL.md +++ b/skill-data/robot-data-engineer/SKILL.md @@ -6,16 +6,19 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Robot Data Engineer -You're the front door, not the worker. Point the user at the right tool and get out of the way. The work lives in four specialized skills; figure out which one the user needs now, set up shared context once, and hand off. The moment you know which skill to load, load it and let it drive. +You're the front door, not the worker. Point the user at the right tools and get out of the way. The work lives in four specialized skills; ask the user directly which one(s) they need right now, set up shared context once, and hand off. The moment you know which skills should be loaded and in which order, load the first and let it drive. -The four stages: +The three stages: 1. **Raw data → clean tables** — `data-transformation`. Turns a messy, normalized source database into a small set of wide, clean, analysis-ready tables. 2. **Clean tables → reusable definitions** — `semantic-layer`. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. -3. **Tables/definitions → charts and dashboards** — `visualization`. Builds the questions and dashboards people look at. -4. **Clean tables → answers and reports** — `data-analysis`. Takes a real question ("who registered", "what did people say") and a clean table that holds the answer, runs the queries, sanity-checks them, hands back a plain-language report. +3. **Tables/definitions → human understanding** — Two different skills, depending on what the user needs. + A. Charts and dashboards? `visualization`. Builds the questions and dashboards people look at. + B. Plain-language analysis? `data-analysis`. Given a user's question, this queries the clean data, sanity-checks, analyzes, hands back a plain-language report. -Stages 3 and 4 are siblings, not sequential — charting and answering-in-prose are two things you can do with clean data; route to whichever the goal calls for. Users describe a goal, not a stage. Map the goal to a stage, confirm, and route. +Stages 3A and 3B are not sequential, but options: answering-in-prose and charting are two different things you can do with clean data; route to whichever the goal calls for. Users describe a goal, not a stage. Map the goal to a stage, confirm, and route. + +In some cases, the user will want to do all of 1-3 sequentially; in other cases, just one or two of the stages. --- From b38bf256606a311de9e7fa0119bbc66644c8e868 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Wed, 3 Jun 2026 10:18:37 -0600 Subject: [PATCH 29/31] Fix card e2e: tolerate version-dependent bad-Database-ID error v58-61 leak the app-DB constraint (NULL not allowed for column "DATABASE_ID"); head validates at the query layer first (missing or invalid Database ID). Accept either exact substring. --- tests/e2e/card.e2e.test.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/e2e/card.e2e.test.ts b/tests/e2e/card.e2e.test.ts index fbfacd0..14136df 100644 --- a/tests/e2e/card.e2e.test.ts +++ b/tests/e2e/card.e2e.test.ts @@ -375,10 +375,14 @@ describe("card e2e", () => { }); // Pre-flight is bypassed; the server then rejects the malformed body with an HttpError (exit 1). - // The card-create endpoint surfaces the underlying app-DB constraint message via the response - // envelope; we assert a stable substring of that surfaced error. + // The surfaced message for the bad Database ID is version-dependent: v58-61 leak the app-DB + // constraint, while head validates at the query layer first. Accept either exact substring. expect(result.exitCode).toBe(1); - expect(cliErrorMessage(result.stderr)).toContain('NULL not allowed for column "DATABASE_ID"'); + const surfaced = cliErrorMessage(result.stderr); + const rejectedBadDatabaseId = + surfaced.includes('NULL not allowed for column "DATABASE_ID"') || + surfaced.includes("missing or invalid Database ID (:database)"); + expect(rejectedBadDatabaseId).toBe(true); expect(result.stdout).toBe(""); }); From 4657be2ce0f7c52b19318d690d4cf7630dca0b92 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Wed, 3 Jun 2026 10:32:00 -0600 Subject: [PATCH 30/31] Fix card update e2e: tolerate version-dependent PUT validation head validates dataset_query at the query layer (exit 1, missing or invalid Database ID); v58-61 accept it as an opaque map (exit 0). Assert the pre-flight bypass instead of a fixed server outcome. --- tests/e2e/card.e2e.test.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/e2e/card.e2e.test.ts b/tests/e2e/card.e2e.test.ts index 14136df..7479e91 100644 --- a/tests/e2e/card.e2e.test.ts +++ b/tests/e2e/card.e2e.test.ts @@ -522,11 +522,19 @@ describe("card e2e", () => { env: authEnv(), }); - // PUT /api/card/:id accepts dataset_query as an opaque map and does not validate its inner - // shape, so the bad `database` does not trigger a 400. Bypass is proven by exit 0 — without - // --skip-validate the prior test shows pre-flight rejects with exit 2. - expect(result.exitCode, result.stderr).toBe(0); - expect(parseJson(result.stdout, CardCompact).id).toBe(SEEDED.ordersCardId); + // Bypass is proven by the absence of the CLI pre-flight: exit code is never 2 and the MBQL 5 + // validation message never fires (without --skip-validate the prior test shows exit 2). What the + // server then does with the bad `database` is its own authority and is version-dependent: v58-61 + // accept dataset_query as an opaque map (exit 0, card returned), while head validates the query + // layer and rejects it (exit 1, "missing or invalid Database ID"). + expect(result.exitCode).not.toBe(2); + expect(result.stderr).not.toContain("card.dataset_query validation failed"); + if (result.exitCode === 0) { + expect(parseJson(result.stdout, CardCompact).id).toBe(SEEDED.ordersCardId); + } else { + expect(result.exitCode).toBe(1); + expect(cliErrorMessage(result.stderr)).toContain("missing or invalid Database ID (:database)"); + } }); it("create with dataset_query: {} is rejected at the CLI boundary (no H2 stack trace)", async () => { From 58fa27aae9ba5e13c5969a3268cd0a394b9fc094 Mon Sep 17 00:00:00 2001 From: Bryan Maass Date: Wed, 3 Jun 2026 10:40:42 -0600 Subject: [PATCH 31/31] format test files --- tests/e2e/card.e2e.test.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/e2e/card.e2e.test.ts b/tests/e2e/card.e2e.test.ts index 7479e91..4ab591d 100644 --- a/tests/e2e/card.e2e.test.ts +++ b/tests/e2e/card.e2e.test.ts @@ -533,7 +533,9 @@ describe("card e2e", () => { expect(parseJson(result.stdout, CardCompact).id).toBe(SEEDED.ordersCardId); } else { expect(result.exitCode).toBe(1); - expect(cliErrorMessage(result.stderr)).toContain("missing or invalid Database ID (:database)"); + expect(cliErrorMessage(result.stderr)).toContain( + "missing or invalid Database ID (:database)", + ); } });