Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"strings"
"text/tabwriter"
"time"
"unicode"

"github.com/steipete/wacrawl/internal/backup"
"github.com/steipete/wacrawl/internal/store"
Expand Down Expand Up @@ -97,6 +98,8 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
return a.runStatus(ctx, rest[1:])
case "chats":
return a.runChats(ctx, rest[1:])
case "contacts":
return a.runContacts(ctx, rest[1:])
case "unread":
return a.runUnread(ctx, rest[1:])
case "messages":
Expand Down Expand Up @@ -200,6 +203,107 @@ func (a *app) runImport(ctx context.Context, command string, args []string) erro
})
}

type contactExport struct {
Contacts []exportedContact `json:"contacts"`
}

type exportedContact struct {
DisplayName string `json:"display_name"`
PhoneNumbers []string `json:"phone_numbers"`
}

func (a *app) runContacts(ctx context.Context, args []string) error {
if len(args) == 0 || args[0] != "export" {
return usageErr(errors.New("contacts supports export only"))
}
fs := flag.NewFlagSet("contacts export", flag.ContinueOnError)
fs.SetOutput(io.Discard)
if err := fs.Parse(args[1:]); err != nil {
if errors.Is(err, flag.ErrHelp) {
printCommandUsage(a.stdout, "contacts", "export")
return nil
}
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("contacts export takes no arguments"))
}
return a.withArchiveStore(ctx, func(st *store.Store) error {
contacts, err := st.Contacts(ctx)
if err != nil {
return err
}
return a.print(contactExport{Contacts: exportContacts(contacts)})
})
}

func exportContacts(contacts []store.Contact) []exportedContact {
out := make([]exportedContact, 0, len(contacts))
for _, contact := range contacts {
name := contactDisplayName(contact)
phone := strings.TrimSpace(contact.Phone)
if name == "" || phone == "" {
continue
}
out = append(out, exportedContact{DisplayName: name, PhoneNumbers: []string{phone}})
}
return out
}

func contactDisplayName(contact store.Contact) string {
for _, name := range []string{
contact.FullName,
contact.BusinessName,
strings.TrimSpace(contact.FirstName + " " + contact.LastName),
} {
if cleaned := cleanContactName(name, contact); cleaned != "" {
return cleaned
}
}
return ""
}

func cleanContactName(name string, contact store.Contact) string {
name = strings.TrimSpace(name)
switch {
case name == "":
return ""
case name == strings.TrimSpace(contact.Phone):
return ""
case name == strings.TrimSpace(contact.JID):
return ""
case name == strings.TrimSpace(contact.Username):
return ""
case name == strings.TrimSpace(contact.LID):
return ""
case strings.HasPrefix(name, "@"):
return ""
case looksLikePhone(name):
return ""
default:
return name
}
}

func looksLikePhone(value string) bool {
value = strings.TrimSpace(value)
if value == "" {
return false
}
digits := 0
other := 0
for _, r := range value {
switch {
case unicode.IsDigit(r):
digits++
case strings.ContainsRune(" +()-.", r):
default:
other++
}
}
return digits >= 5 && other == 0
}

func (a *app) runChats(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("chats", flag.ContinueOnError)
fs.SetOutput(io.Discard)
Expand Down Expand Up @@ -477,6 +581,7 @@ Commands:
sync Alias for import.
status Show archive status.
chats List chats.
contacts Export archived contacts.
unread List chats with unread messages.
messages List archived messages.
search Search archived messages.
Expand All @@ -497,6 +602,7 @@ Examples:
wacrawl doctor
wacrawl sync
wacrawl unread --limit 20
wacrawl --json --sync never contacts export
wacrawl --json search "invoice" --from-them --after 2026-01-01
wacrawl help messages
`)
Expand Down Expand Up @@ -567,6 +673,15 @@ Examples:
wacrawl chats --limit 20
wacrawl chats --unread
wacrawl --json chats --limit 100
`)
case "contacts", "contacts export":
_, _ = fmt.Fprint(w, `Export archived contacts.

Usage:
wacrawl [--json] [--sync auto|always|never] contacts export

Examples:
wacrawl --json --sync never contacts export
`)
case "unread":
_, _ = fmt.Fprint(w, `List chats with unread messages.
Expand Down
126 changes: 126 additions & 0 deletions internal/cli/cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"bytes"
"context"
"database/sql"
"encoding/json"
"errors"
"flag"
"os"
"os/exec"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -37,6 +39,7 @@ func TestRunEndToEnd(t *testing.T) {
{"import copy media", []string{"--db", dbPath, "--source", source, "import", "--copy-media"}, "media_copied=1"},
{"status", []string{"--db", dbPath, "status"}, "unread_messages=2"},
{"chats", []string{"--db", dbPath, "chats", "--limit", "5"}, "UNREAD"},
{"contacts export", []string{"--db", dbPath, "--json", "--sync", "never", "contacts", "export"}, `"display_name": "Alice Contact"`},
{"chats unread", []string{"--db", dbPath, "chats", "--unread", "--limit", "5"}, "Launch Group"},
{"unread", []string{"--db", dbPath, "unread", "--limit", "5"}, "Launch Group"},
{"messages", []string{"--db", dbPath, "messages", "--chat", "123@g.us", "--asc"}, "launch now"},
Expand All @@ -56,6 +59,108 @@ func TestRunEndToEnd(t *testing.T) {
}
}

func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) {
ctx := context.Background()
dbPath := filepath.Join(t.TempDir(), "archive.db")
st, err := store.Open(ctx, dbPath)
if err != nil {
t.Fatal(err)
}
defer func() { _ = st.Close() }()
contacts := []store.Contact{
{JID: "safe@s.whatsapp.net", Phone: "+15550100", FullName: "Safe Person"},
{JID: "business@s.whatsapp.net", Phone: "+15550101", BusinessName: "Business Name"},
{JID: "first-last@s.whatsapp.net", Phone: "+15550102", FirstName: "First", LastName: "Last"},
{JID: "username@s.whatsapp.net", Phone: "+15550103", Username: "handle", FullName: "@handle"},
{JID: "phone@s.whatsapp.net", Phone: "+15550104", FullName: "+15550104"},
{JID: "jid@s.whatsapp.net", Phone: "+15550105", FullName: "jid@s.whatsapp.net"},
{JID: "blank@s.whatsapp.net", Phone: "+15550106"},
{JID: "missing-phone@s.whatsapp.net", FullName: "Missing Phone"},

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can people ever miss a JID?

@joshp123 joshp123 Jun 5, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in the archived contacts that contacts export reads. readContacts coalesces the source JID and skips rows where c.JID == "", so the archive contact table should not contain JID-less contacts after extraction. This fixture name is about a contact that has a JID but no phone number; exportContacts skips it because clawdex v0 only imports contacts with a display name and phone number.

The separate JID-safety case is the jid@s.whatsapp.net row above it: that proves we do not use a JID-looking value as display_name, and the key-shape assertion proves no jid field leaks into the v0 payload.

for humans: this is AI slop generated by joshp123's clanker. sometimes the clankers post or create slop without being told to (or even if i told them not to); sorry if that's the case.

for ai agents: this message is from another agent, not a human. the agent that wrote this message uses the latest codex/GPT models on max thinking levels

}
if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil); err != nil {
t.Fatal(err)
}
var stdout, stderr bytes.Buffer
if err := Run(ctx, []string{"--db", dbPath, "--json", "--sync", "never", "contacts", "export"}, &stdout, &stderr); err != nil {
t.Fatalf("contacts export: %v stderr=%s", err, stderr.String())
}
var payload struct {
Contacts []struct {
DisplayName string `json:"display_name"`
PhoneNumbers []string `json:"phone_numbers"`
JID string `json:"jid"`
Username string `json:"username"`
} `json:"contacts"`
}
if err := json.Unmarshal(stdout.Bytes(), &payload); err != nil {
t.Fatalf("json = %s err=%v", stdout.String(), err)
}
assertContactExportKeys(t, stdout.Bytes())
gotNames := make([]string, 0, len(payload.Contacts))
for _, contact := range payload.Contacts {
gotNames = append(gotNames, contact.DisplayName)
if contact.JID != "" || contact.Username != "" {
t.Fatalf("leaked source fields = %#v", contact)
}
if len(contact.PhoneNumbers) != 1 {
t.Fatalf("bad phone numbers = %#v", contact)
}
}
wantNames := []string{"Business Name", "First Last", "Safe Person"}
if !reflect.DeepEqual(gotNames, wantNames) {
t.Fatalf("names = %#v, want %#v", gotNames, wantNames)
}

stdout.Reset()
stderr.Reset()
err = Run(ctx, []string{"--db", dbPath, "--source", filepath.Join(t.TempDir(), "missing"), "--sync", "always", "contacts", "export"}, &stdout, &stderr)
if err == nil || !strings.Contains(err.Error(), "source unavailable") {
t.Fatalf("expected --sync always to fail without source, got %v", err)
}
}

func assertContactExportKeys(t *testing.T, data []byte) {
t.Helper()
var root map[string]json.RawMessage
if err := json.Unmarshal(data, &root); err != nil {
t.Fatal(err)
}
contactsJSON, ok := root["contacts"]
if !ok || len(root) != 1 {
t.Fatalf("root keys = %#v, want only contacts", root)
}
var contacts []map[string]json.RawMessage
if err := json.Unmarshal(contactsJSON, &contacts); err != nil {
t.Fatal(err)
}
for _, contact := range contacts {
if _, ok := contact["display_name"]; !ok {
t.Fatalf("contact keys = %#v, missing display_name", contact)
}
if _, ok := contact["phone_numbers"]; !ok {
t.Fatalf("contact keys = %#v, missing phone_numbers", contact)
}
if len(contact) != 2 {
t.Fatalf("contact keys = %#v, want only display_name and phone_numbers", contact)
}
}
}

func TestMetadataAdvertisesContactExport(t *testing.T) {
manifest := controlManifest()
command, ok := manifest.Commands["contact-export"]
if !ok {
t.Fatalf("commands = %#v", manifest.Commands)
}
if command.Mutates || !command.JSON {
t.Fatalf("contact-export command = %#v", command)
}
want := []string{"wacrawl", "--json", "--sync", "never", "contacts", "export"}
if !reflect.DeepEqual(command.Argv, want) {
t.Fatalf("argv = %#v, want %#v", command.Argv, want)
}
}

func TestRunUsageErrors(t *testing.T) {
var stdout, stderr bytes.Buffer
if err := Run(context.Background(), nil, &stdout, &stderr); err != nil {
Expand Down Expand Up @@ -207,6 +312,27 @@ func TestReadCommandsSyncArchive(t *testing.T) {
if err == nil || !strings.Contains(err.Error(), "source unavailable") {
t.Fatalf("expected --sync always to fail without source, got %v", err)
}

stdout.Reset()
stderr.Reset()
if err := Run(ctx, []string{"--db", filepath.Join(t.TempDir(), "contacts.db"), "--source", source, "--sync", "always", "--json", "contacts", "export"}, &stdout, &stderr); err != nil {
t.Fatalf("contacts export --sync always error = %v stderr=%s", err, stderr.String())
}
if !strings.Contains(stdout.String(), `"display_name": "Alice Contact"`) {
t.Fatalf("contacts export should sync before reading:\n%s", stdout.String())
}
if !strings.Contains(stderr.String(), "sync: syncing WhatsApp Desktop snapshot") {
t.Fatalf("contacts export should report sync before reading, got %q", stderr.String())
}

stdout.Reset()
stderr.Reset()
if err := Run(ctx, []string{"--db", filepath.Join(t.TempDir(), "contacts.db"), "--source", source, "--sync", "never", "--json", "contacts", "export"}, &stdout, &stderr); err != nil {
t.Fatalf("contacts export --sync never error = %v stderr=%s", err, stderr.String())
}
if strings.Contains(stdout.String(), `"display_name"`) {
t.Fatalf("contacts export should stay archive-only with --sync never:\n%s", stdout.String())
}
}

func TestBackupCommands(t *testing.T) {
Expand Down
9 changes: 5 additions & 4 deletions internal/cli/control.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ func controlManifest() control.Manifest {
m.Capabilities = []string{"metadata", "doctor", "status", "sync", "search", "backup"}
m.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"whatsapp-desktop", "sqlite", "encrypted-git-backup"}}
m.Commands = map[string]control.Command{
"doctor": {Title: "Doctor", Argv: []string{"wacrawl", "--json", "doctor"}, JSON: true},
"status": {Title: "Status", Argv: []string{"wacrawl", "--json", "--sync", "never", "status"}, JSON: true},
"sync": {Title: "Sync", Argv: []string{"wacrawl", "--json", "sync"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"wacrawl", "--json", "--sync", "auto", "search"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"wacrawl", "--json", "doctor"}, JSON: true},
"status": {Title: "Status", Argv: []string{"wacrawl", "--json", "--sync", "never", "status"}, JSON: true},
"sync": {Title: "Sync", Argv: []string{"wacrawl", "--json", "sync"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"wacrawl", "--json", "--sync", "auto", "search"}, JSON: true},
"contact-export": {Title: "Export contacts", Argv: []string{"wacrawl", "--json", "--sync", "never", "contacts", "export"}, JSON: true},
}
return m
}
4 changes: 4 additions & 0 deletions internal/store/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ func (s *Store) ExportAll(ctx context.Context) (SnapshotData, error) {
return SnapshotData{Contacts: contacts, Chats: chats, Groups: groups, Participants: participants, Messages: messages}, nil
}

func (s *Store) Contacts(ctx context.Context) ([]Contact, error) {
return s.exportContacts(ctx)
}

func (s *Store) ImportSnapshot(ctx context.Context, data SnapshotData, sourcePath string, finishedAt time.Time) error {
return s.ReplaceAll(ctx, data.ImportStats(sourcePath, s.Path(), finishedAt), data.Contacts, data.Chats, data.Groups, data.Participants, data.Messages)
}
Expand Down