diff --git a/go/cmd/contract.go b/go/cmd/contract.go index 9489b61..92224b0 100644 --- a/go/cmd/contract.go +++ b/go/cmd/contract.go @@ -533,6 +533,79 @@ func runContractCreateCopilot(client *api.Client, dataset, outFile string, noWai return nil } +// runContractCreateCopilotBulk submits one GenerateContract operation for N +// qualifiedNames, polls a single status, then fetches each contract and writes +// it to its own file (named via outFiles[qualifiedName]). Per-dataset failures +// are logged as warnings and don't abort the rest. Returns the list of files +// successfully written. +func runContractCreateCopilotBulk(client *api.Client, qualifiedNames []string, outFiles map[string]string, noWait bool) ([]string, error) { + if len(qualifiedNames) == 0 { + return nil, nil + } + opID, err := client.GenerateContract(api.GenerateContractRequest{ + DatasetQualifiedNames: qualifiedNames, + }) + if err != nil { + return nil, err + } + + if noWait { + fmt.Printf(" %s AI contract generation started for %d datasets.\n", output.Green.Render("✓"), len(qualifiedNames)) + fmt.Println(output.Dim.Render(" Running in background — contracts will appear in Soda Cloud when ready.")) + fmt.Println(output.Dim.Render(" Check results: sodacli results list")) + return nil, nil + } + + spinner := output.NewSpinner(fmt.Sprintf("Generating AI contracts for %d datasets...", len(qualifiedNames))) + spinner.Start() + + elapsed := 0 + for { + time.Sleep(3 * time.Second) + elapsed += 3 + status, err := client.GetGenerateStatus(opID) + if err != nil { + spinner.Stop() + return nil, err + } + if status.State == "completed" { + break + } + if status.State == "failed" || status.State == "canceled" { + spinner.Stop() + return nil, output.Errorf(2, "AI generation %s", status.State) + } + spinner.SetMessage(fmt.Sprintf("Generating AI contracts for %d datasets... (%ds)", len(qualifiedNames), elapsed)) + } + spinner.Stop() + + written := make([]string, 0, len(qualifiedNames)) + for _, qn := range qualifiedNames { + contract, err := client.FindContractByDataset(qn) + if err != nil { + fmt.Fprintf(os.Stderr, " %s [%s] could not fetch contract: %v\n", output.Yellow.Render("⚠"), qn, err) + continue + } + if contract == nil { + fmt.Fprintf(os.Stderr, " %s [%s] AI generation completed but contract was not persisted.\n", output.Yellow.Render("⚠"), qn) + continue + } + outFile := outFiles[qn] + if outFile == "" { + outFile = datasetFileName(qn) + } + if err := os.WriteFile(outFile, []byte(contract.Contents), 0644); err != nil { + fmt.Fprintf(os.Stderr, " %s [%s] could not write file: %v\n", output.Yellow.Render("⚠"), qn, err) + continue + } + written = append(written, outFile) + } + if len(written) > 0 { + output.PrintSuccess(fmt.Sprintf("Wrote %d AI-generated contract(s).", len(written)), GCtx) + } + return written, nil +} + // ── contract copilot ────────────────────────────────────────────────────────── var contractCopilotCmd = &cobra.Command{ diff --git a/go/cmd/dataset.go b/go/cmd/dataset.go index 3f8dc94..81a757a 100644 --- a/go/cmd/dataset.go +++ b/go/cmd/dataset.go @@ -523,6 +523,8 @@ var datasetDiagnosticsCmd = &cobra.Command{ noCollectResults, _ := cmd.Flags().GetBool("no-collect-results") collectFailedRows, _ := cmd.Flags().GetBool("collect-failed-rows") noCollectFailedRows, _ := cmd.Flags().GetBool("no-collect-failed-rows") + uniqueKeys, _ := cmd.Flags().GetStringSlice("unique-keys") + hasUniqueKeys := cmd.Flags().Changed("unique-keys") // flags not yet in the public API — fail fast with a clear message unsupportedFlags := []string{"schema", "table-prefix", "table-suffix", "failed-rows-description", "expose-failed-rows-query", "no-expose-failed-rows-query", "failed-rows-cta", "no-failed-rows-cta"} @@ -538,7 +540,7 @@ var datasetDiagnosticsCmd = &cobra.Command{ } // no flags → show current settings - if !collectResults && !noCollectResults && !collectFailedRows && !noCollectFailedRows { + if !collectResults && !noCollectResults && !collectFailedRows && !noCollectFailedRows && !hasUniqueKeys { result, err := client.GetDatasetDiagnostics(args[0]) if err != nil { return err @@ -569,6 +571,9 @@ var datasetDiagnosticsCmd = &cobra.Command{ if result.FailedRowsConfiguration.State != "" { fmt.Printf(" %-28s %s\n", output.Bold.Render("State"), result.FailedRowsConfiguration.State) } + if len(result.FailedRowsConfiguration.UniqueKeyColumnNames) > 0 { + fmt.Printf(" %-28s %s\n", output.Bold.Render("Unique key columns"), strings.Join(result.FailedRowsConfiguration.UniqueKeyColumnNames, ", ")) + } } return nil } @@ -579,9 +584,27 @@ var datasetDiagnosticsCmd = &cobra.Command{ enabled := collectResults cfg.ScanAndResultsConfiguration = &api.DiagnosticsScanConfig{Enabled: &enabled} } - if collectFailedRows || noCollectFailedRows { - enabled := collectFailedRows - cfg.FailedRowsConfiguration = &api.DiagnosticsFailedRowsConfig{Enabled: &enabled} + if collectFailedRows || noCollectFailedRows || hasUniqueKeys { + // Seed from current state — the API replaces the whole + // failedRowsConfiguration object, so untouched fields would be reset. + current, err := client.GetDatasetDiagnostics(args[0]) + if err != nil { + return err + } + fr := &api.DiagnosticsFailedRowsConfig{} + if current.FailedRowsConfiguration != nil { + enabled := current.FailedRowsConfiguration.Enabled + fr.Enabled = &enabled + fr.UniqueKeyColumnNames = current.FailedRowsConfiguration.UniqueKeyColumnNames + } + if collectFailedRows || noCollectFailedRows { + enabled := collectFailedRows + fr.Enabled = &enabled + } + if hasUniqueKeys { + fr.UniqueKeyColumnNames = uniqueKeys + } + cfg.FailedRowsConfiguration = fr } if _, err := client.UpdateDatasetDiagnostics(args[0], cfg); err != nil { @@ -781,6 +804,7 @@ func init() { datasetDiagnosticsCmd.Flags().Bool("no-collect-results", false, "Disable storing check results and scan history") datasetDiagnosticsCmd.Flags().Bool("collect-failed-rows", false, "Store failed rows") datasetDiagnosticsCmd.Flags().Bool("no-collect-failed-rows", false, "Disable storing failed rows") + datasetDiagnosticsCmd.Flags().StringSlice("unique-keys", nil, "Unique key columns for failed rows collection (comma-separated or repeated)") datasetDiagnosticsCmd.Flags().String("table-prefix", "", "Prefix for diagnostic table names") datasetDiagnosticsCmd.Flags().String("table-suffix", "", "Suffix for diagnostic table names") datasetDiagnosticsCmd.Flags().String("failed-rows-description", "", "Description for failed rows storage context") diff --git a/go/cmd/dataset_onboard.go b/go/cmd/dataset_onboard.go index 09df6c8..061e863 100644 --- a/go/cmd/dataset_onboard.go +++ b/go/cmd/dataset_onboard.go @@ -12,20 +12,63 @@ import ( "github.com/soda-data-inc/soda-cli/internal/output" ) +// datasetInfo carries everything we need about each dataset between resolve +// and execute phases. +type datasetInfo struct { + ID string + Name string + QualifiedName string // canonical "datasource/db/schema/table" + Onboarded bool + DatasourceID string // populated only when promotion is needed +} + var datasetOnboardCmd = &cobra.Command{ - Use: "onboard ", - Short: "Guided setup: enable monitors, profiling and contracts for a dataset", - Long: `Set up a dataset with default monitors, profiling and optionally generate a contract. + Use: "onboard [dataset-id]", + Short: "Guided setup: enable monitors, profiling and contracts for one or more datasets", + Long: `Set up one or more datasets with default monitors, profiling and optionally generate contracts. + +Single-dataset mode walks through interactive prompts: + + sodacli dataset onboard + +Bulk mode (multiple datasets via --dataset, repeatable) requires non-interactive flags: -Interactive mode walks through each step. Use flags for CI/CD or AI agents: + sodacli dataset onboard --dataset --dataset \ + --monitoring --no-profiling --contracts copilot - sodacli dataset onboard --monitoring --profiling --contracts skeleton`, - Args: cobra.ExactArgs(1), +Failed-rows collection (--collect-failed-rows / --unique-keys) is only supported +in single-dataset mode, since unique keys are dataset-specific.`, + Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - datasetID := args[0] + // ── Collect dataset IDs (positional + repeatable --dataset) ───────── + ids := []string{} + if len(args) == 1 && strings.TrimSpace(args[0]) != "" { + ids = append(ids, strings.TrimSpace(args[0])) + } + extra, _ := cmd.Flags().GetStringArray("dataset") + for _, e := range extra { + if e = strings.TrimSpace(e); e != "" { + ids = append(ids, e) + } + } + seen := map[string]bool{} + dedup := ids[:0] + for _, id := range ids { + if !seen[id] { + seen[id] = true + dedup = append(dedup, id) + } + } + ids = dedup + if len(ids) == 0 { + return output.Errorf(2, "at least one dataset ID is required (positional or --dataset)") + } + bulk := len(ids) > 1 + hasMonitoring := cmd.Flags().Changed("monitoring") || cmd.Flags().Changed("no-monitoring") hasProfiling := cmd.Flags().Changed("profiling") || cmd.Flags().Changed("no-profiling") hasContracts := cmd.Flags().Changed("contracts") + hasFailedRows := cmd.Flags().Changed("collect-failed-rows") || cmd.Flags().Changed("no-collect-failed-rows") || cmd.Flags().Changed("unique-keys") noInteractive := GCtx.NoInteractive || (hasMonitoring && hasProfiling && hasContracts) enableMonitoring, _ := cmd.Flags().GetBool("monitoring") @@ -33,35 +76,138 @@ Interactive mode walks through each step. Use flags for CI/CD or AI agents: enableProfiling, _ := cmd.Flags().GetBool("profiling") noProfiling, _ := cmd.Flags().GetBool("no-profiling") contractsMode, _ := cmd.Flags().GetString("contracts") + enableCollectFailedRows, _ := cmd.Flags().GetBool("collect-failed-rows") + uniqueKeys, _ := cmd.Flags().GetStringSlice("unique-keys") + + // Bulk-mode constraints + if bulk { + if !hasMonitoring || !hasProfiling || !hasContracts { + return output.Errorf(2, "bulk mode (multiple datasets) requires --monitoring/--no-monitoring, --profiling/--no-profiling, and --contracts copilot|skeleton|none") + } + if hasFailedRows { + return output.Errorf(2, "--collect-failed-rows / --unique-keys are not supported in bulk mode (run dataset onboard one at a time for failed-rows setup, since unique keys are dataset-specific)") + } + } client, err := newAPIClient() if err != nil { return err } - // Validate dataset exists - fmt.Println(output.Dim.Render(" Checking dataset...")) - datasets, err := client.ListDatasets(api.ListDatasetsParams{Size: 500}) - if err != nil { - return err + // ── Resolve all dataset IDs ───────────────────────────────────────── + fmt.Println(output.Dim.Render(fmt.Sprintf(" Checking %d dataset(s)...", len(ids)))) + infoByID := make(map[string]*datasetInfo, len(ids)) + for _, id := range ids { + infoByID[id] = &datasetInfo{ID: id} } - var datasetName string - var qualifiedName string - for _, d := range datasets.Content { - if d.ID == datasetID { - datasetName = d.Name - qualifiedName = d.Datasource.Name + "/" + strings.ReplaceAll(d.QualifiedName, ".", "/") + + // Sweep already-onboarded datasets via paginated ListDatasets. + unresolved := len(ids) + page := 0 + for unresolved > 0 { + datasets, err := client.ListDatasets(api.ListDatasetsParams{Size: 500, Page: page}) + if err != nil { + return err + } + for _, d := range datasets.Content { + if i, ok := infoByID[d.ID]; ok && !i.Onboarded { + i.Name = d.Name + i.QualifiedName = d.Datasource.Name + "/" + strings.ReplaceAll(d.QualifiedName, ".", "/") + i.Onboarded = true + unresolved-- + } + } + if datasets.Last || len(datasets.Content) == 0 { break } + page++ } - if datasetName == "" { - return output.Errorf(2, "dataset '%s' not found", datasetID) + + // Anything still unresolved → look across discovered datasets per datasource. + if unresolved > 0 { + dsPage, dsErr := client.ListDatasources(0, 500) + if dsErr != nil { + return dsErr + } + for _, ds := range dsPage.Content { + if unresolved == 0 { + break + } + discPage, discErr := client.ListDiscoveredDatasets(ds.ID, 0, 500) + if discErr != nil { + continue + } + for i := range discPage.Content { + d := &discPage.Content[i] + if info, ok := infoByID[d.ID]; ok && !info.Onboarded && info.DatasourceID == "" { + info.DatasourceID = ds.ID + info.Name = d.Name + unresolved-- + } + } + } } - fmt.Printf(" Dataset: %s\n\n", output.Bold.Render(datasetName)) - // ── Determine settings ────────────────────────────────────────────── + var notFound []string + for _, id := range ids { + if !infoByID[id].Onboarded && infoByID[id].DatasourceID == "" { + notFound = append(notFound, id) + } + } + if len(notFound) > 0 { + return output.Errorf(2, "dataset(s) not found: %s", strings.Join(notFound, ", ")) + } - if !hasMonitoring && !hasProfiling && !hasContracts { + // ── Promote any not-yet-onboarded datasets, batched per datasource ── + toPromoteByDS := map[string][]string{} + for _, id := range ids { + if !infoByID[id].Onboarded { + toPromoteByDS[infoByID[id].DatasourceID] = append(toPromoteByDS[infoByID[id].DatasourceID], id) + } + } + if len(toPromoteByDS) > 0 { + n := 0 + for _, v := range toPromoteByDS { + n += len(v) + } + fmt.Println(output.Dim.Render(fmt.Sprintf(" Onboarding %d discovered dataset(s)...", n))) + for dsID, idList := range toPromoteByDS { + if err := client.OnboardDiscoveredDatasets(dsID, api.OnboardDatasetsRequest{ + DiscoveredDatasetIDs: idList, + }); err != nil { + return err + } + } + // Re-fetch each via the standard endpoint so qualifiedName matches + // the format used by the already-onboarded path (DiscoveredDataset + // includes the datasource prefix; Dataset does not). + for _, id := range ids { + if infoByID[id].Onboarded { + continue + } + detail, err := client.GetDataset(id) + if err != nil { + return err + } + infoByID[id].Name = detail.Name + infoByID[id].QualifiedName = detail.Datasource.Name + "/" + strings.ReplaceAll(detail.QualifiedName, ".", "/") + infoByID[id].Onboarded = true + } + } + + // Print resolved datasets + if bulk { + fmt.Printf(" Datasets (%d):\n", len(ids)) + for _, id := range ids { + fmt.Printf(" • %s\n", infoByID[id].Name) + } + fmt.Println() + } else { + fmt.Printf(" Dataset: %s\n\n", output.Bold.Render(infoByID[ids[0]].Name)) + } + + // ── Determine settings (interactive form only valid for single-dataset) ── + if !bulk && !hasMonitoring && !hasProfiling && !hasContracts && !hasFailedRows { if noInteractive { return output.Errorf(2, "flags required in non-interactive mode: --monitoring/--no-monitoring, --profiling/--no-profiling, --contracts copilot|skeleton|none") } @@ -69,39 +215,67 @@ Interactive mode walks through each step. Use flags for CI/CD or AI agents: monitoringChoice := "yes" profilingChoice := "yes" contractChoice := "none" + failedRowsChoice := "no" + uniqueKeysInput := "" - form := huh.NewForm(huh.NewGroup( - huh.NewSelect[string](). - Title("Enable default metric monitoring?"). - Description("Row count, row count change, freshness, schema changes,\npartition row count, most recent timestamp."). - Options( - huh.NewOption("Yes", "yes"), - huh.NewOption("No", "no"), - ). - Value(&monitoringChoice), - huh.NewSelect[string](). - Title("Enable dataset profiling?"). - Description("Column stats, row counts, and data type distribution."). - Options( - huh.NewOption("Yes", "yes"), - huh.NewOption("No", "no"), - ). - Value(&profilingChoice), - huh.NewSelect[string](). - Title("Set up a data contract?"). - Options( - huh.NewOption("AI-generated contract (Copilot)", "copilot"), - huh.NewOption("Skeleton contract (empty template)", "skeleton"), - huh.NewOption("No contract", "none"), - ). - Value(&contractChoice), - )) + form := huh.NewForm( + huh.NewGroup( + huh.NewSelect[string](). + Title("Enable default metric monitoring?"). + Description("Row count, row count change, freshness, schema changes,\npartition row count, most recent timestamp."). + Options( + huh.NewOption("Yes", "yes"), + huh.NewOption("No", "no"), + ). + Value(&monitoringChoice), + huh.NewSelect[string](). + Title("Enable dataset profiling?"). + Description("Column stats, row counts, and data type distribution."). + Options( + huh.NewOption("Yes", "yes"), + huh.NewOption("No", "no"), + ). + Value(&profilingChoice), + huh.NewSelect[string](). + Title("Set up a data contract?"). + Options( + huh.NewOption("AI-generated contract (Copilot)", "copilot"), + huh.NewOption("Skeleton contract (empty template)", "skeleton"), + huh.NewOption("No contract", "none"), + ). + Value(&contractChoice), + huh.NewSelect[string](). + Title("Enable failed rows collection?"). + Description("Store rows that fail checks in the diagnostics warehouse.\nRequires unique key columns."). + Options( + huh.NewOption("Yes", "yes"), + huh.NewOption("No", "no"), + ). + Value(&failedRowsChoice), + ), + huh.NewGroup( + huh.NewInput(). + Title("Unique key columns"). + Description("Comma-separated list, e.g. id,customer_email"). + Value(&uniqueKeysInput), + ).WithHideFunc(func() bool { + return failedRowsChoice != "yes" + }), + ) if err := form.Run(); err != nil { return output.Errorf(2, "onboarding cancelled") } enableMonitoring = monitoringChoice == "yes" enableProfiling = profilingChoice == "yes" contractsMode = contractChoice + enableCollectFailedRows = failedRowsChoice == "yes" + if enableCollectFailedRows { + for _, k := range strings.Split(uniqueKeysInput, ",") { + if k = strings.TrimSpace(k); k != "" { + uniqueKeys = append(uniqueKeys, k) + } + } + } } else { if noMonitoring { enableMonitoring = false @@ -112,11 +286,19 @@ Interactive mode walks through each step. Use flags for CI/CD or AI agents: if contractsMode == "" { contractsMode = "none" } + // Treat --unique-keys alone as implicit --collect-failed-rows (single-mode only). + if !bulk && len(uniqueKeys) > 0 { + enableCollectFailedRows = true + } + } + + if enableCollectFailedRows && len(uniqueKeys) == 0 { + return output.Errorf(2, "--unique-keys is required when --collect-failed-rows is set (failed rows collection won't work without unique key columns)") } // ── Execute ───────────────────────────────────────────────────────── - // Step 1: Monitoring + Profiling + // Step 1: Monitoring + Profiling (per-dataset API call) if enableMonitoring || enableProfiling { label := "" switch { @@ -128,38 +310,68 @@ Interactive mode walks through each step. Use flags for CI/CD or AI agents: label = "Enabling dataset profiling..." } fmt.Println(output.Dim.Render(" " + label)) - if err := client.EnableDatasetDefaults(datasetID, enableMonitoring, enableProfiling); err != nil { - fmt.Fprintf(os.Stderr, " %s Could not enable settings: %v\n", output.Yellow.Render("⚠"), err) - } else { + var hadErr bool + for _, id := range ids { + if err := client.EnableDatasetDefaults(id, enableMonitoring, enableProfiling); err != nil { + fmt.Fprintf(os.Stderr, " %s [%s] %v\n", output.Yellow.Render("⚠"), infoByID[id].Name, err) + hadErr = true + } + } + if !hadErr { fmt.Println(output.Green.Render(" ✓") + " " + label[:len(label)-3] + "d.") } } else { fmt.Println(output.Dim.Render(" Skipping monitoring and profiling setup.")) } - // Step 2: Contracts - var contractFile string + // Step 2: Failed rows (single-mode only — bulk-mode constraint above blocks this) + if enableCollectFailedRows { + id := ids[0] + fmt.Println(output.Dim.Render(" Enabling failed rows collection...")) + enabled := true + cfg := api.DiagnosticsWarehouseConfig{ + ScanAndResultsConfiguration: &api.DiagnosticsScanConfig{Enabled: &enabled}, + FailedRowsConfiguration: &api.DiagnosticsFailedRowsConfig{ + Enabled: &enabled, + UniqueKeyColumnNames: uniqueKeys, + }, + } + if _, err := client.UpdateDatasetDiagnostics(id, cfg); err != nil { + fmt.Fprintf(os.Stderr, " %s Could not enable failed rows collection: %v\n", output.Yellow.Render("⚠"), err) + if isNotEnabledOnDatasource(err) { + fmt.Fprintf(os.Stderr, " %s\n", output.Dim.Render("Set up the diagnostics warehouse on the datasource first:")) + fmt.Fprintf(os.Stderr, " %s\n", output.Dim.Render(" sodacli datasource diagnostics --enable")) + } + } else { + fmt.Println(output.Green.Render(" ✓") + fmt.Sprintf(" Failed rows collection enabled (keys: %s).", strings.Join(uniqueKeys, ", "))) + } + } + + // Step 3: Contracts + var contractFiles []string switch contractsMode { case "copilot": - if qualifiedName == "" { - fmt.Fprintf(os.Stderr, " %s Cannot generate AI contract: dataset qualified name not available.\n", output.Yellow.Render("⚠")) + qns := make([]string, 0, len(ids)) + outFiles := make(map[string]string, len(ids)) + for _, id := range ids { + qn := infoByID[id].QualifiedName + qns = append(qns, qn) + outFiles[qn] = datasetFileName(qn) + } + files, err := runContractCreateCopilotBulk(client, qns, outFiles, false) + if err != nil { + fmt.Fprintf(os.Stderr, " %s Contract generation failed: %v\n", output.Yellow.Render("⚠"), err) } else { - outFile := datasetFileName(qualifiedName) - if err := runContractCreateCopilot(client, qualifiedName, outFile, false); err != nil { - fmt.Fprintf(os.Stderr, " %s Contract generation failed: %v\n", output.Yellow.Render("⚠"), err) - } else { - contractFile = outFile - } + contractFiles = append(contractFiles, files...) } case "skeleton": - if qualifiedName == "" { - fmt.Fprintf(os.Stderr, " %s Cannot generate skeleton contract: dataset qualified name not available.\n", output.Yellow.Render("⚠")) - } else { - outFile := datasetFileName(qualifiedName) - if err := runContractCreateSkeleton(client, qualifiedName, outFile); err != nil { - fmt.Fprintf(os.Stderr, " %s Contract generation failed: %v\n", output.Yellow.Render("⚠"), err) + for _, id := range ids { + qn := infoByID[id].QualifiedName + outFile := datasetFileName(qn) + if err := runContractCreateSkeleton(client, qn, outFile); err != nil { + fmt.Fprintf(os.Stderr, " %s [%s] Skeleton generation failed: %v\n", output.Yellow.Render("⚠"), infoByID[id].Name, err) } else { - contractFile = outFile + contractFiles = append(contractFiles, outFile) } } case "none": @@ -168,17 +380,23 @@ Interactive mode walks through each step. Use flags for CI/CD or AI agents: return output.Errorf(2, "unknown contracts mode '%s' — use copilot, skeleton, or none", contractsMode) } - // Step 3: Verify contract - if contractFile != "" { + // Step 4: Verify contracts + if len(contractFiles) > 0 { fmt.Println() - fmt.Println(output.Dim.Render(" Verifying contract...")) - if err := runContractVerify(client, contractFile, false); err != nil { - fmt.Fprintf(os.Stderr, " %s Verification failed: %v\n", output.Yellow.Render("⚠"), err) + fmt.Println(output.Dim.Render(fmt.Sprintf(" Verifying %d contract(s)...", len(contractFiles)))) + for _, f := range contractFiles { + if err := runContractVerify(client, f, false); err != nil { + fmt.Fprintf(os.Stderr, " %s [%s] Verification failed: %v\n", output.Yellow.Render("⚠"), f, err) + } } } fmt.Println() - output.PrintSuccess(fmt.Sprintf("Dataset '%s' onboarding complete.", datasetName), GCtx) + if bulk { + output.PrintSuccess(fmt.Sprintf("Onboarded %d datasets.", len(ids)), GCtx) + } else { + output.PrintSuccess(fmt.Sprintf("Dataset '%s' onboarding complete.", infoByID[ids[0]].Name), GCtx) + } return nil }, } @@ -194,11 +412,15 @@ func datasetFileName(qualifiedName string) string { } func init() { + datasetOnboardCmd.Flags().StringArray("dataset", nil, "Additional dataset ID to onboard (repeatable, enables bulk mode)") datasetOnboardCmd.Flags().Bool("monitoring", false, "Enable default metric monitors") datasetOnboardCmd.Flags().Bool("no-monitoring", false, "Skip monitoring setup") datasetOnboardCmd.Flags().Bool("profiling", false, "Enable dataset profiling") datasetOnboardCmd.Flags().Bool("no-profiling", false, "Skip profiling setup") datasetOnboardCmd.Flags().String("contracts", "", "Generate contract: copilot|skeleton|none") + datasetOnboardCmd.Flags().Bool("collect-failed-rows", false, "Enable failed rows collection (single-dataset only; requires --unique-keys)") + datasetOnboardCmd.Flags().Bool("no-collect-failed-rows", false, "Skip failed rows collection setup") + datasetOnboardCmd.Flags().StringSlice("unique-keys", nil, "Unique key columns for failed rows collection (single-dataset only; comma-separated or repeated)") datasetCmd.AddCommand(datasetOnboardCmd) } diff --git a/go/internal/api/datasets.go b/go/internal/api/datasets.go index c46aa6d..93d5b42 100644 --- a/go/internal/api/datasets.go +++ b/go/internal/api/datasets.go @@ -142,9 +142,10 @@ type DiagnosticsWarehouseResult struct { } type DiagnosticsFailedRowsResult struct { - Enabled bool `json:"enabled"` - MaxRowCount int `json:"maxRowCount"` - State string `json:"state"` + Enabled bool `json:"enabled"` + MaxRowCount int `json:"maxRowCount"` + State string `json:"state"` + UniqueKeyColumnNames []string `json:"uniqueKeyColumnNames"` } type DiagnosticsScanResult struct { @@ -165,7 +166,8 @@ func (c *Client) GetDatasetDiagnostics(datasetID string) (*DiagnosticsWarehouseR // POST /api/v1/datasets/{id}/diagnosticsWarehouse request type DiagnosticsFailedRowsConfig struct { - Enabled *bool `json:"enabled,omitempty"` + Enabled *bool `json:"enabled,omitempty"` + UniqueKeyColumnNames []string `json:"uniqueKeyColumnNames,omitempty"` } type DiagnosticsScanConfig struct {