Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions docs/imports.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
---
written_by: ai
---

# Imports

Imports project an external contact graph into the same markdown shape
Expand Down Expand Up @@ -101,6 +105,54 @@ Same shape as birdclaw, but reads from
[discrawl](https://github.com/steipete/discrawl)'s SQLite cache. Discord
handles land under `accounts.discord`.

## Crawler Contacts

```bash
clawdex import contacts --from telecrawl --dry-run
clawdex import contacts --from wacrawl --dry-run
clawdex import contacts --from /path/to/crawler --dry-run
```

Reads a local crawler's crawlkit metadata and runs its advertised
`contact-export` command. This is the v0 machine contract for source crawler
contacts:

- metadata schema is `crawlkit.control.v1`
- command name is `contact-export`
- command is read-only and advertises `json: true`
- advertised `argv` includes `--json` plus any source-safe flags
- payload root is `contacts`
- each contact has only `display_name` and `phone_numbers`

Source crawlers own source-native extraction and privacy filtering. Clawdex
owns canonical people, markdown storage, matching, and human edits.

Crawler contact imports match existing people by source accounts, external IDs,
emails, or normalized phone numbers. They do not automatically merge by name
alone; a matching display name without a matching phone is treated as a new
person for now instead of risking a bad join.

If one exported crawler contact contains a phone already owned by a different
person, clawdex leaves that conflicting phone off the matched person instead of
creating an automatic cross-person join.

When a crawler contact matches an existing person, clawdex records that source
under the person's local markdown frontmatter:

```yaml
sources:
telecrawl:
names: ["Ada Example"]
phones: ["15550100"]
wacrawl:
names: ["Ada Example"]
phones: ["+1 555 0100"]
```

That source evidence is local-only and stable across repeated imports. It lets
clawdex answer that a person was seen in Telegram or WhatsApp even when the
incoming phone number was already present and no canonical phone field changed.

## Sync (preview-only)

```bash
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/openclaw/clawdex

go 1.26.3
go 1.26.4

require (
github.com/alecthomas/kong v1.15.0
Expand Down
117 changes: 117 additions & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package cli

import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"slices"
"sort"
"strings"
"time"
Expand All @@ -16,13 +19,15 @@ import (
"github.com/openclaw/clawdex/internal/apple"
"github.com/openclaw/clawdex/internal/avatar"
"github.com/openclaw/clawdex/internal/birdclaw"
"github.com/openclaw/clawdex/internal/contactexport"
"github.com/openclaw/clawdex/internal/discrawl"
"github.com/openclaw/clawdex/internal/google"
"github.com/openclaw/clawdex/internal/index"
"github.com/openclaw/clawdex/internal/markdown"
"github.com/openclaw/clawdex/internal/model"
"github.com/openclaw/clawdex/internal/repo"
"github.com/openclaw/clawdex/internal/vcard"
"github.com/openclaw/crawlkit/control"
)

var Version = "dev"
Expand Down Expand Up @@ -414,10 +419,122 @@ func (c *SearchCmd) Run(r *Runtime) error {
type ImportCmd struct {
Apple ImportAppleCmd `cmd:"" help:"Import Apple Contacts into local markdown"`
Birdclaw ImportBirdclawCmd `cmd:"" help:"Import X/Twitter DM contacts from local birdclaw archive"`
Contacts ImportContactsCmd `cmd:"" help:"Import contacts from a source crawler"`
Google ImportGoogleCmd `cmd:"" help:"Import Google Contacts into local markdown"`
Discrawl ImportDiscrawlCmd `cmd:"" help:"Import Discord DM contacts from local discrawl archive"`
}

type ImportContactsCmd struct {
From string `name:"from" help:"Crawler binary to import contacts from" required:""`
}

func (c *ImportContactsCmd) Run(r *Runtime) error {
source, contacts, err := readCrawlerContacts(r.ctx, c.From)
if err != nil {
return err
}
changes, err := r.store.ImportCrawlerContacts(source, contacts, r.root.DryRun, time.Now())
if err != nil {
return err
}
return r.print(changes)
}

func readCrawlerContacts(ctx context.Context, binary string) (string, []model.SourceContact, error) {
manifest, err := readCrawlerManifest(ctx, binary)
if err != nil {
return "", nil, err
}
command, ok := manifest.Commands["contact-export"]
if !ok {
return "", nil, fmt.Errorf("%s metadata does not advertise contact-export", binary)
}
if !command.JSON {
return "", nil, fmt.Errorf("%s contact-export must advertise json output", binary)
}
if command.Mutates {
return "", nil, fmt.Errorf("%s contact-export must be read-only", binary)
}
if len(command.Argv) == 0 {
return "", nil, fmt.Errorf("%s contact-export argv is empty", binary)
}
argv, err := contactExportArgv(binary, command.Argv)
if err != nil {
return "", nil, err
}
cmd := exec.CommandContext(ctx, argv[0], argv[1:]...) // #nosec G204 -- argv comes from the local crawler manifest and is executed without a shell.
var stderr bytes.Buffer
cmd.Stderr = &stderr
data, err := cmd.Output()
if err != nil {
msg := strings.TrimSpace(stderr.String())
if msg != "" {
return "", nil, fmt.Errorf("%s contact-export failed: %w: %s", binary, err, msg)
}
return "", nil, fmt.Errorf("%s contact-export failed: %w", binary, err)
}
export, err := contactexport.Decode(bytes.NewReader(data))
if err != nil {
return "", nil, fmt.Errorf("%s contact-export decode failed: %w", binary, err)
}
source := strings.TrimSpace(manifest.ID)
if source == "" {
source = filepath.Base(binary)
}
return source, sourceContactsFromExport(source, export), nil
}

func contactExportArgv(binary string, advertised []string) ([]string, error) {
if len(advertised) == 0 {
return nil, fmt.Errorf("%s contact-export argv is empty", binary)
}
requestedName := filepath.Base(binary)
advertisedName := filepath.Base(advertised[0])
if requestedName != "" && advertisedName != "" && requestedName != advertisedName {
return nil, fmt.Errorf("%s contact-export argv starts with %q, want %q", binary, advertised[0], requestedName)
}
if !slices.Contains(advertised, "--json") {
return nil, fmt.Errorf("%s contact-export argv must include --json", binary)
}
argv := append([]string(nil), advertised...)
argv[0] = binary
return argv, nil
}

func readCrawlerManifest(ctx context.Context, binary string) (control.Manifest, error) {
cmd := exec.CommandContext(ctx, binary, "--json", "metadata") // #nosec G204 -- binary is an explicit local crawler command.
var stderr bytes.Buffer
cmd.Stderr = &stderr
data, err := cmd.Output()
if err != nil {
msg := strings.TrimSpace(stderr.String())
if msg != "" {
return control.Manifest{}, fmt.Errorf("%s metadata failed: %w: %s", binary, err, msg)
}
return control.Manifest{}, fmt.Errorf("%s metadata failed: %w", binary, err)
}
var manifest control.Manifest
if err := json.Unmarshal(data, &manifest); err != nil {
return control.Manifest{}, fmt.Errorf("%s metadata decode failed: %w", binary, err)
}
if strings.TrimSpace(manifest.SchemaVersion) != control.SchemaVersion {
return control.Manifest{}, fmt.Errorf("%s metadata schema_version = %q, want %q", binary, manifest.SchemaVersion, control.SchemaVersion)
}
return manifest, nil
}

func sourceContactsFromExport(source string, export contactexport.ContactExport) []model.SourceContact {
contacts := make([]model.SourceContact, 0, len(export.Contacts))
for _, c := range export.Contacts {
contact := model.SourceContact{Source: source, Name: c.DisplayName}
for i, phone := range c.PhoneNumbers {
contact.Phones = append(contact.Phones, model.ContactValue{Value: phone, Source: source, Primary: i == 0})
}
contacts = append(contacts, contact)
}
return contacts
}

type ImportAppleCmd struct {
Input string `name:"input" help:"JSON/NDJSON contact file instead of macOS Contacts"`
Avatars bool `name:"avatars" help:"Import local avatar thumbnails"`
Expand Down
Loading