From 280de31369325b0483dc3ee560c87c69861d9592 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 00:38:30 +0200 Subject: [PATCH 1/8] feat: expose contact export command --- internal/cli/cli.go | 93 +++++++++++++++++++++++++++++++++ internal/cli/cli_test.go | 110 +++++++++++++++++++++++++++++++++++++++ internal/cli/control.go | 9 ++-- internal/store/export.go | 4 ++ 4 files changed, 212 insertions(+), 4 deletions(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index efd74ee..940bedb 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -13,6 +13,7 @@ import ( "sort" "strings" "time" + "unicode" "github.com/openclaw/telecrawl/internal/backup" "github.com/openclaw/telecrawl/internal/store" @@ -466,6 +467,9 @@ func (r *runtime) runFolders(args []string) error { } func (r *runtime) runContacts(args []string) error { + if len(args) > 0 && args[0] == "export" { + return r.runContactsExport(args[1:]) + } fs := flag.NewFlagSet("telecrawl contacts", flag.ContinueOnError) fs.SetOutput(io.Discard) limit := fs.Int("limit", 100, "") @@ -484,6 +488,94 @@ func (r *runtime) runContacts(args []string) error { }) } +type contactExport struct { + Contacts []exportedContact `json:"contacts"` +} + +type exportedContact struct { + DisplayName string `json:"display_name"` + PhoneNumbers []string `json:"phone_numbers"` +} + +func (r *runtime) runContactsExport(args []string) error { + fs := flag.NewFlagSet("telecrawl contacts export", flag.ContinueOnError) + fs.SetOutput(io.Discard) + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { + return usageErr(errors.New("contacts export takes no arguments")) + } + return r.withStore(func(st *store.Store) error { + contacts, err := st.ExportContacts(r.ctx) + if err != nil { + return err + } + return r.print(contactExport{Contacts: exportContacts(contacts)}) + }) +} + +func exportContacts(contacts []store.Contact) []exportedContact { + out := make([]exportedContact, 0, len(contacts)) + for _, contact := range contacts { + name := contactDisplayName(contact) + phone := strings.TrimSpace(contact.Phone) + if name == "" || phone == "" { + continue + } + out = append(out, exportedContact{DisplayName: name, PhoneNumbers: []string{phone}}) + } + return out +} + +func contactDisplayName(contact store.Contact) string { + if name := cleanContactName(contact.FullName, contact); name != "" { + return name + } + return cleanContactName(strings.TrimSpace(contact.FirstName+" "+contact.LastName), contact) +} + +func cleanContactName(name string, contact store.Contact) string { + name = strings.TrimSpace(name) + switch { + case name == "": + return "" + case name == strings.TrimSpace(contact.Phone): + return "" + case name == strings.TrimSpace(contact.JID): + return "" + case name == strings.TrimSpace(contact.Username): + return "" + case name == strings.TrimSpace(contact.LID): + return "" + case strings.HasPrefix(name, "@"): + return "" + case looksLikePhone(name): + return "" + default: + return name + } +} + +func looksLikePhone(value string) bool { + value = strings.TrimSpace(value) + if value == "" { + return false + } + digits := 0 + other := 0 + for _, r := range value { + switch { + case unicode.IsDigit(r): + digits++ + case strings.ContainsRune(" +()-.", r): + default: + other++ + } + } + return digits >= 5 && other == 0 +} + func (r *runtime) runTopics(args []string) error { fs := flag.NewFlagSet("telecrawl topics", flag.ContinueOnError) fs.SetOutput(io.Discard) @@ -810,6 +902,7 @@ usage: telecrawl [--json] status telecrawl [--json] folders telecrawl [--json] contacts [--limit N] + telecrawl --json contacts export telecrawl [--json] chats [--limit N] [--unread] [--folder ID] telecrawl [--json] topics --chat ID [--limit N] telecrawl [--json] messages [--chat ID] [--topic ID] [--limit N] [--after DATE] diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 0f8053f..f9b74b4 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -3,6 +3,7 @@ package cli import ( "bytes" "context" + "encoding/json" "os" "path/filepath" "slices" @@ -77,6 +78,115 @@ func TestImportResultForChatFiltersContacts(t *testing.T) { } } +func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { + ctx := context.Background() + db := filepath.Join(t.TempDir(), "telecrawl.db") + st, err := store.Open(ctx, db) + if err != nil { + t.Fatal(err) + } + defer func() { _ = st.Close() }() + contacts := make([]store.Contact, 0, 104) + for i := 0; i < 101; i++ { + contacts = append(contacts, store.Contact{ + JID: "safe-" + string(rune('a'+(i%26))) + "-" + string(rune('a'+((i/26)%26))), + Phone: "+1555010" + strings.Repeat("0", 3-len(string(rune('0'+(i%10))))) + string(rune('0'+(i%10))), + FullName: "Safe Person", + }) + } + contacts = append(contacts, + store.Contact{JID: "first-last", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, + store.Contact{JID: "username-only", Phone: "+15559990002", Username: "handle", FullName: "@handle"}, + store.Contact{JID: "phone-only", Phone: "+15559990003", FullName: "+15559990003"}, + store.Contact{JID: "jid-only", Phone: "+15559990004", FullName: "jid-only"}, + store.Contact{JID: "blank-name", Phone: "+15559990005"}, + store.Contact{JID: "no-phone", FullName: "No Phone"}, + ) + if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil, nil); err != nil { + t.Fatal(err) + } + var out, errOut bytes.Buffer + err = Run(ctx, []string{"--json", "--db", db, "contacts", "export"}, &out, &errOut) + if err != nil { + t.Fatalf("contacts export: %v stderr=%s", err, errOut.String()) + } + var payload struct { + Contacts []struct { + DisplayName string `json:"display_name"` + PhoneNumbers []string `json:"phone_numbers"` + JID string `json:"jid"` + Username string `json:"username"` + } `json:"contacts"` + } + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("json = %s err=%v", out.String(), err) + } + assertContactExportKeys(t, out.Bytes()) + if len(payload.Contacts) != 102 { + t.Fatalf("contacts = %d, want 102", len(payload.Contacts)) + } + var sawFirstLast bool + for _, contact := range payload.Contacts { + if contact.DisplayName == "First Last" { + sawFirstLast = true + } + if contact.DisplayName == "" || len(contact.PhoneNumbers) != 1 { + t.Fatalf("bad contact = %#v", contact) + } + if contact.JID != "" || contact.Username != "" { + t.Fatalf("leaked source fields = %#v", contact) + } + if strings.HasPrefix(contact.DisplayName, "@") || strings.HasPrefix(contact.DisplayName, "+") || contact.DisplayName == "jid-only" { + t.Fatalf("unsafe display name exported: %#v", contact) + } + } + if !sawFirstLast { + t.Fatalf("missing composed first/last name: %#v", payload.Contacts) + } +} + +func assertContactExportKeys(t *testing.T, data []byte) { + t.Helper() + var root map[string]json.RawMessage + if err := json.Unmarshal(data, &root); err != nil { + t.Fatal(err) + } + contactsJSON, ok := root["contacts"] + if !ok || len(root) != 1 { + t.Fatalf("root keys = %#v, want only contacts", root) + } + var contacts []map[string]json.RawMessage + if err := json.Unmarshal(contactsJSON, &contacts); err != nil { + t.Fatal(err) + } + for _, contact := range contacts { + if _, ok := contact["display_name"]; !ok { + t.Fatalf("contact keys = %#v, missing display_name", contact) + } + if _, ok := contact["phone_numbers"]; !ok { + t.Fatalf("contact keys = %#v, missing phone_numbers", contact) + } + if len(contact) != 2 { + t.Fatalf("contact keys = %#v, want only display_name and phone_numbers", contact) + } + } +} + +func TestMetadataAdvertisesContactExport(t *testing.T) { + manifest := controlManifest() + command, ok := manifest.Commands["contact-export"] + if !ok { + t.Fatalf("commands = %#v", manifest.Commands) + } + if command.Mutates || !command.JSON { + t.Fatalf("contact-export command = %#v", command) + } + want := []string{"telecrawl", "--json", "contacts", "export"} + if !slices.Equal(command.Argv, want) { + t.Fatalf("argv = %#v, want %#v", command.Argv, want) + } +} + func TestStoreImportResultPreservesArchivedMediaOnReimport(t *testing.T) { ctx := context.Background() st, err := store.Open(ctx, filepath.Join(t.TempDir(), "telecrawl.db")) diff --git a/internal/cli/control.go b/internal/cli/control.go index d88c1e5..e3117ce 100644 --- a/internal/cli/control.go +++ b/internal/cli/control.go @@ -19,10 +19,11 @@ func controlManifest() control.Manifest { m.Capabilities = []string{"metadata", "doctor", "status", "sync", "search", "backup"} m.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"telegram-desktop", "telegram-macos-postbox", "sqlite", "encrypted-git-backup"}} m.Commands = map[string]control.Command{ - "doctor": {Title: "Doctor", Argv: []string{"telecrawl", "--json", "doctor"}, JSON: true}, - "status": {Title: "Status", Argv: []string{"telecrawl", "--json", "status"}, JSON: true}, - "sync": {Title: "Import", Argv: []string{"telecrawl", "--json", "import"}, JSON: true, Mutates: true}, - "search": {Title: "Search", Argv: []string{"telecrawl", "--json", "search"}, JSON: true}, + "doctor": {Title: "Doctor", Argv: []string{"telecrawl", "--json", "doctor"}, JSON: true}, + "status": {Title: "Status", Argv: []string{"telecrawl", "--json", "status"}, JSON: true}, + "sync": {Title: "Import", Argv: []string{"telecrawl", "--json", "import"}, JSON: true, Mutates: true}, + "search": {Title: "Search", Argv: []string{"telecrawl", "--json", "search"}, JSON: true}, + "contact-export": {Title: "Export contacts", Argv: []string{"telecrawl", "--json", "contacts", "export"}, JSON: true}, } return m } diff --git a/internal/store/export.go b/internal/store/export.go index a3ea78b..f230dee 100644 --- a/internal/store/export.go +++ b/internal/store/export.go @@ -79,6 +79,10 @@ func (s *Store) ListContacts(ctx context.Context, limit int) ([]Contact, error) return s.contacts(ctx, limit) } +func (s *Store) ExportContacts(ctx context.Context) ([]Contact, error) { + return s.allContacts(ctx) +} + func (s *Store) allContacts(ctx context.Context) ([]Contact, error) { return s.contacts(ctx, 0) } From e751fb99b8234e99d27f0c31d71fab7f5e40de44 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 12:56:21 +0200 Subject: [PATCH 2/8] docs: clarify contacts export usage --- internal/cli/cli.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 940bedb..000f0d9 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -902,7 +902,7 @@ usage: telecrawl [--json] status telecrawl [--json] folders telecrawl [--json] contacts [--limit N] - telecrawl --json contacts export + telecrawl [--json] contacts export telecrawl [--json] chats [--limit N] [--unread] [--folder ID] telecrawl [--json] topics --chat ID [--limit N] telecrawl [--json] messages [--chat ID] [--topic ID] [--limit N] [--after DATE] From c21ef4b51f3ed280887c7df5c4fb0873856f33ba Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 19:00:29 +0200 Subject: [PATCH 3/8] fix: filter unsafe Telegram contact exports --- internal/cli/cli.go | 37 ++++++++++++++++++++++++++++++++----- internal/cli/cli_test.go | 8 +++++++- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 000f0d9..3d29366 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -519,7 +519,7 @@ func exportContacts(contacts []store.Contact) []exportedContact { out := make([]exportedContact, 0, len(contacts)) for _, contact := range contacts { name := contactDisplayName(contact) - phone := strings.TrimSpace(contact.Phone) + phone := contactPhoneNumber(contact.Phone) if name == "" || phone == "" { continue } @@ -540,13 +540,13 @@ func cleanContactName(name string, contact store.Contact) string { switch { case name == "": return "" - case name == strings.TrimSpace(contact.Phone): + case sameContactText(name, contact.Phone): return "" - case name == strings.TrimSpace(contact.JID): + case sameContactText(name, contact.JID): return "" - case name == strings.TrimSpace(contact.Username): + case sameContactText(name, contact.Username): return "" - case name == strings.TrimSpace(contact.LID): + case sameContactText(name, contact.LID): return "" case strings.HasPrefix(name, "@"): return "" @@ -557,6 +557,33 @@ func cleanContactName(name string, contact store.Contact) string { } } +func sameContactText(a, b string) bool { + a = strings.TrimSpace(a) + b = strings.TrimSpace(b) + return a != "" && b != "" && strings.EqualFold(a, b) +} + +func contactPhoneNumber(value string) string { + value = strings.TrimSpace(value) + if value == "" { + return "" + } + digits := 0 + for _, r := range value { + switch { + case unicode.IsDigit(r): + digits++ + case strings.ContainsRune(" +()-.", r): + default: + return "" + } + } + if digits < 7 || digits > 15 { + return "" + } + return value +} + func looksLikePhone(value string) bool { value = strings.TrimSpace(value) if value == "" { diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index f9b74b4..6473e5b 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -94,13 +94,16 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { FullName: "Safe Person", }) } - contacts = append(contacts, + contacts = append( + contacts, store.Contact{JID: "first-last", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, store.Contact{JID: "username-only", Phone: "+15559990002", Username: "handle", FullName: "@handle"}, + store.Contact{JID: "bare-username-only", Phone: "+15559990006", Username: "handle", FullName: "Handle"}, store.Contact{JID: "phone-only", Phone: "+15559990003", FullName: "+15559990003"}, store.Contact{JID: "jid-only", Phone: "+15559990004", FullName: "jid-only"}, store.Contact{JID: "blank-name", Phone: "+15559990005"}, store.Contact{JID: "no-phone", FullName: "No Phone"}, + store.Contact{JID: "short-code", Phone: "42777", FullName: "Telegram"}, ) if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil, nil); err != nil { t.Fatal(err) @@ -139,6 +142,9 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { if strings.HasPrefix(contact.DisplayName, "@") || strings.HasPrefix(contact.DisplayName, "+") || contact.DisplayName == "jid-only" { t.Fatalf("unsafe display name exported: %#v", contact) } + if contact.DisplayName == "Handle" || contact.PhoneNumbers[0] == "42777" { + t.Fatalf("unsafe contact exported: %#v", contact) + } } if !sawFirstLast { t.Fatalf("missing composed first/last name: %#v", payload.Contacts) From 23fc1459db256f9a2cdf503351f03c4bfd562599 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 20:39:13 +0200 Subject: [PATCH 4/8] fix: narrow Telegram service contact filter --- internal/cli/cli.go | 30 ++++++++++-------------------- internal/cli/cli_test.go | 15 +++++++++++---- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 3d29366..6ae1054 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -518,8 +518,11 @@ func (r *runtime) runContactsExport(args []string) error { func exportContacts(contacts []store.Contact) []exportedContact { out := make([]exportedContact, 0, len(contacts)) for _, contact := range contacts { + if isTelegramServiceContact(contact) { + continue + } name := contactDisplayName(contact) - phone := contactPhoneNumber(contact.Phone) + phone := strings.TrimSpace(contact.Phone) if name == "" || phone == "" { continue } @@ -563,25 +566,12 @@ func sameContactText(a, b string) bool { return a != "" && b != "" && strings.EqualFold(a, b) } -func contactPhoneNumber(value string) string { - value = strings.TrimSpace(value) - if value == "" { - return "" - } - digits := 0 - for _, r := range value { - switch { - case unicode.IsDigit(r): - digits++ - case strings.ContainsRune(" +()-.", r): - default: - return "" - } - } - if digits < 7 || digits > 15 { - return "" - } - return value +func isTelegramServiceContact(contact store.Contact) bool { + return strings.TrimSpace(contact.Phone) == "42777" && + sameContactText(contact.FullName, "Telegram") && + sameContactText(contact.FirstName, "Telegram") && + strings.TrimSpace(contact.LastName) == "" && + strings.TrimSpace(contact.Username) == "" } func looksLikePhone(value string) bool { diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 6473e5b..2527cdd 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -103,7 +103,8 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { store.Contact{JID: "jid-only", Phone: "+15559990004", FullName: "jid-only"}, store.Contact{JID: "blank-name", Phone: "+15559990005"}, store.Contact{JID: "no-phone", FullName: "No Phone"}, - store.Contact{JID: "short-code", Phone: "42777", FullName: "Telegram"}, + store.Contact{JID: "short-phone-person", Phone: "12345", FullName: "Short Phone Person"}, + store.Contact{JID: "telegram-service", Phone: "42777", FullName: "Telegram", FirstName: "Telegram"}, ) if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil, nil); err != nil { t.Fatal(err) @@ -125,14 +126,17 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { t.Fatalf("json = %s err=%v", out.String(), err) } assertContactExportKeys(t, out.Bytes()) - if len(payload.Contacts) != 102 { - t.Fatalf("contacts = %d, want 102", len(payload.Contacts)) + if len(payload.Contacts) != 103 { + t.Fatalf("contacts = %d, want 103", len(payload.Contacts)) } - var sawFirstLast bool + var sawFirstLast, sawShortPhonePerson bool for _, contact := range payload.Contacts { if contact.DisplayName == "First Last" { sawFirstLast = true } + if contact.DisplayName == "Short Phone Person" && contact.PhoneNumbers[0] == "12345" { + sawShortPhonePerson = true + } if contact.DisplayName == "" || len(contact.PhoneNumbers) != 1 { t.Fatalf("bad contact = %#v", contact) } @@ -149,6 +153,9 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { if !sawFirstLast { t.Fatalf("missing composed first/last name: %#v", payload.Contacts) } + if !sawShortPhonePerson { + t.Fatalf("missing short phone person: %#v", payload.Contacts) + } } func assertContactExportKeys(t *testing.T, data []byte) { From 5b74dd362687a2a687ef1d89b5fa80c82d6989b0 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 21:33:40 +0200 Subject: [PATCH 5/8] fix: export only conversation-backed Telegram contacts What: - narrow contact-export to Telegram contacts with chat or message evidence - suppress exact duplicate display-name and phone rows - cover stale peer exclusion and exact duplicate suppression in tests Why: - keep clawdex from importing stale Telegram peer records as canonical people - preserve the simple contact-export contract without adding graph or candidate fields Tests: - git diff --check (pass) - nix shell nixpkgs#go --command go test ./... (pass) - nix shell nixpkgs#go --command go vet ./... (pass) - nix shell nixpkgs#go --command go build ./cmd/telecrawl (pass) --- internal/cli/cli.go | 6 +++++ internal/cli/cli_test.go | 58 ++++++++++++++++++++++++++++------------ internal/store/export.go | 11 +++++++- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 6ae1054..99aca21 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -517,6 +517,7 @@ func (r *runtime) runContactsExport(args []string) error { func exportContacts(contacts []store.Contact) []exportedContact { out := make([]exportedContact, 0, len(contacts)) + seen := map[string]struct{}{} for _, contact := range contacts { if isTelegramServiceContact(contact) { continue @@ -526,6 +527,11 @@ func exportContacts(contacts []store.Contact) []exportedContact { if name == "" || phone == "" { continue } + key := name + "\x00" + phone + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} out = append(out, exportedContact{DisplayName: name, PhoneNumbers: []string{phone}}) } return out diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 2527cdd..405663f 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "os" "path/filepath" "slices" @@ -87,26 +88,39 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { } defer func() { _ = st.Close() }() contacts := make([]store.Contact, 0, 104) + messages := make([]store.Message, 0, 104) + addContact := func(contact store.Contact, withEvidence bool) { + contacts = append(contacts, contact) + if !withEvidence { + return + } + messages = append(messages, store.Message{ + SourcePK: int64(len(messages) + 1), + ChatJID: contact.JID, + MessageID: fmt.Sprintf("msg-%d", len(messages)+1), + Timestamp: time.Date(2026, 6, 5, 12, 0, 0, 0, time.UTC), + Text: "contact evidence", + }) + } for i := 0; i < 101; i++ { - contacts = append(contacts, store.Contact{ + addContact(store.Contact{ JID: "safe-" + string(rune('a'+(i%26))) + "-" + string(rune('a'+((i/26)%26))), - Phone: "+1555010" + strings.Repeat("0", 3-len(string(rune('0'+(i%10))))) + string(rune('0'+(i%10))), + Phone: fmt.Sprintf("+155501%05d", i), FullName: "Safe Person", - }) - } - contacts = append( - contacts, - store.Contact{JID: "first-last", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, - store.Contact{JID: "username-only", Phone: "+15559990002", Username: "handle", FullName: "@handle"}, - store.Contact{JID: "bare-username-only", Phone: "+15559990006", Username: "handle", FullName: "Handle"}, - store.Contact{JID: "phone-only", Phone: "+15559990003", FullName: "+15559990003"}, - store.Contact{JID: "jid-only", Phone: "+15559990004", FullName: "jid-only"}, - store.Contact{JID: "blank-name", Phone: "+15559990005"}, - store.Contact{JID: "no-phone", FullName: "No Phone"}, - store.Contact{JID: "short-phone-person", Phone: "12345", FullName: "Short Phone Person"}, - store.Contact{JID: "telegram-service", Phone: "42777", FullName: "Telegram", FirstName: "Telegram"}, - ) - if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil, nil); err != nil { + }, true) + } + addContact(store.Contact{JID: "first-last", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, true) + addContact(store.Contact{JID: "first-last-duplicate", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, true) + addContact(store.Contact{JID: "username-only", Phone: "+15559990002", Username: "handle", FullName: "@handle"}, true) + addContact(store.Contact{JID: "bare-username-only", Phone: "+15559990006", Username: "handle", FullName: "Handle"}, true) + addContact(store.Contact{JID: "phone-only", Phone: "+15559990003", FullName: "+15559990003"}, true) + addContact(store.Contact{JID: "jid-only", Phone: "+15559990004", FullName: "jid-only"}, true) + addContact(store.Contact{JID: "blank-name", Phone: "+15559990005"}, true) + addContact(store.Contact{JID: "no-phone", FullName: "No Phone"}, true) + addContact(store.Contact{JID: "short-phone-person", Phone: "12345", FullName: "Short Phone Person"}, true) + addContact(store.Contact{JID: "telegram-service", Phone: "42777", FullName: "Telegram", FirstName: "Telegram"}, true) + addContact(store.Contact{JID: "stale-peer", Phone: "+15559990007", FullName: "Stale Peer"}, false) + if err := st.ReplaceAll(ctx, store.ImportStats{}, contacts, nil, nil, nil, nil, messages); err != nil { t.Fatal(err) } var out, errOut bytes.Buffer @@ -130,9 +144,13 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { t.Fatalf("contacts = %d, want 103", len(payload.Contacts)) } var sawFirstLast, sawShortPhonePerson bool + firstLastCount := 0 for _, contact := range payload.Contacts { if contact.DisplayName == "First Last" { sawFirstLast = true + if contact.PhoneNumbers[0] == "+15559990001" { + firstLastCount++ + } } if contact.DisplayName == "Short Phone Person" && contact.PhoneNumbers[0] == "12345" { sawShortPhonePerson = true @@ -149,10 +167,16 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { if contact.DisplayName == "Handle" || contact.PhoneNumbers[0] == "42777" { t.Fatalf("unsafe contact exported: %#v", contact) } + if contact.DisplayName == "Stale Peer" { + t.Fatalf("stale contact without conversation evidence exported: %#v", contact) + } } if !sawFirstLast { t.Fatalf("missing composed first/last name: %#v", payload.Contacts) } + if firstLastCount != 1 { + t.Fatalf("first/last duplicate count = %d, want 1", firstLastCount) + } if !sawShortPhonePerson { t.Fatalf("missing short phone person: %#v", payload.Contacts) } diff --git a/internal/store/export.go b/internal/store/export.go index f230dee..3cf11d2 100644 --- a/internal/store/export.go +++ b/internal/store/export.go @@ -80,7 +80,12 @@ func (s *Store) ListContacts(ctx context.Context, limit int) ([]Contact, error) } func (s *Store) ExportContacts(ctx context.Context) ([]Contact, error) { - return s.allContacts(ctx) + query := `select jid,coalesce(peer_type,''),coalesce(phone,''),coalesce(full_name,''),coalesce(first_name,''),coalesce(last_name,''),coalesce(business_name,''),coalesce(username,''),coalesce(lid,''),coalesce(about_text,''),coalesce(avatar_path,''),coalesce(updated_at,0) +from contacts c +where exists (select 1 from chats ch where cast(ch.id as text)=c.jid) + or exists (select 1 from messages m where m.chat_jid=c.jid or m.sender_jid=c.jid) +order by jid` + return s.queryContacts(ctx, query, nil) } func (s *Store) allContacts(ctx context.Context) ([]Contact, error) { @@ -94,6 +99,10 @@ func (s *Store) contacts(ctx context.Context, limit int) ([]Contact, error) { query += " limit ?" args = append(args, limit) } + return s.queryContacts(ctx, query, args) +} + +func (s *Store) queryContacts(ctx context.Context, query string, args []any) ([]Contact, error) { rows, err := s.db.QueryContext(ctx, query, args...) if err != nil { return nil, err From e262056a8ea900277834902a8d1f3ecf25b84633 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 21:39:46 +0200 Subject: [PATCH 6/8] test: harden fake importer execution What: - probe the temporary fake Python helper before importer tests use it - retry briefly when the OS reports the helper script is still text file busy Why: - GitHub CI hit `fork/exec .../python: text file busy` in the importer test fixture - the failure is in the test helper, not the contact-export implementation Tests: - nix shell nixpkgs#go --command sh -c 'GOTOOLCHAIN=local go test -count=1 ./... -coverprofile=coverage.out' (passed) - nix shell nixpkgs#go --command go test ./... (passed) - nix shell nixpkgs#go --command go vet ./... (passed) - nix shell nixpkgs#go --command go build ./cmd/telecrawl (passed) - git diff --check (passed) --- internal/telegramdesktop/importer_test.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/telegramdesktop/importer_test.go b/internal/telegramdesktop/importer_test.go index a3fe52d..9bed277 100644 --- a/internal/telegramdesktop/importer_test.go +++ b/internal/telegramdesktop/importer_test.go @@ -334,13 +334,29 @@ func fakePythonImporter(t *testing.T) (python string, argvPath string) { argvPath = filepath.Join(dir, "argv") python = filepath.Join(dir, "python") result := `{"source_path":"fixture","started_at":"2026-01-01T00:00:00Z","finished_at":"2026-01-01T00:00:00Z","chats":[],"folders":[],"folder_chats":[],"topics":[],"messages":[]}` - body := fmt.Sprintf("#!/bin/sh\nprintf '%%s\\n' \"$@\" > %q\nprintf '%%s\\n' '%s'\n", argvPath, result) + body := fmt.Sprintf("#!/bin/sh\nif [ \"$1\" = \"--probe\" ]; then exit 0; fi\nprintf '%%s\\n' \"$@\" > %q\nprintf '%%s\\n' '%s'\n", argvPath, result) if err := os.WriteFile(python, []byte(body), 0o700); err != nil { t.Fatal(err) } + waitForFakePython(t, python) return python, argvPath } +func waitForFakePython(t *testing.T, python string) { + t.Helper() + for range 20 { + err := exec.Command(python, "--probe").Run() // #nosec G204 -- test executes its own temporary helper. + if err == nil { + return + } + if !strings.Contains(err.Error(), "text file busy") { + t.Fatal(err) + } + time.Sleep(10 * time.Millisecond) + } + t.Fatalf("fake python %s remained text file busy", python) +} + func readImporterArgs(t *testing.T, path string) []string { t.Helper() data, err := os.ReadFile(path) From 621de09047e156c592b37f9507d6fc8c636747f0 Mon Sep 17 00:00:00 2001 From: joshp123 Date: Fri, 5 Jun 2026 22:49:25 +0200 Subject: [PATCH 7/8] fix: collapse Telegram contact exports by phone What: - group contact-export rows by trimmed phone number - prefer the newest source contact name when updated_at is meaningful - fall back to the longer cleaned display name when timestamps tie or are absent - cover newer-name and equal-time richer-name duplicate cases in the export test Why: - Telegram Postbox archives can contain multiple source peer rows for the same phone - clawdex should receive one contact per phone for v0 instead of duplicate source-row names - usernames remain out of the v0 contract and out of display_name fallback behavior Tests: - nix shell nixpkgs#go -c go test ./internal/cli -run TestContactsExportUsesContractShapeAndSkipsUnsafeNames -count=1: pass - nix shell nixpkgs#go -c go test ./...: pass - nix shell nixpkgs#go -c go vet ./...: pass - nix shell nixpkgs#go -c go build ./cmd/telecrawl: pass - git diff --check: pass - copied-real-DB smoke with clawdex pull import: first import created 51 people, repeat import returned [] --- internal/cli/cli.go | 28 +++++++++++++++++++++++----- internal/cli/cli_test.go | 25 ++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 99aca21..3bf0eb6 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -517,7 +517,8 @@ func (r *runtime) runContactsExport(args []string) error { func exportContacts(contacts []store.Contact) []exportedContact { out := make([]exportedContact, 0, len(contacts)) - seen := map[string]struct{}{} + byPhone := map[string]store.Contact{} + phoneOrder := make([]string, 0, len(contacts)) for _, contact := range contacts { if isTelegramServiceContact(contact) { continue @@ -527,16 +528,33 @@ func exportContacts(contacts []store.Contact) []exportedContact { if name == "" || phone == "" { continue } - key := name + "\x00" + phone - if _, ok := seen[key]; ok { - continue + if current, ok := byPhone[phone]; ok { + if preferContactExportName(contact, current) { + byPhone[phone] = contact + } + } else { + byPhone[phone] = contact + phoneOrder = append(phoneOrder, phone) } - seen[key] = struct{}{} + } + for _, phone := range phoneOrder { + contact := byPhone[phone] + name := contactDisplayName(contact) out = append(out, exportedContact{DisplayName: name, PhoneNumbers: []string{phone}}) } return out } +func preferContactExportName(candidate, current store.Contact) bool { + if candidate.UpdatedAt.After(current.UpdatedAt) { + return true + } + if current.UpdatedAt.After(candidate.UpdatedAt) { + return false + } + return len([]rune(contactDisplayName(candidate))) > len([]rune(contactDisplayName(current))) +} + func contactDisplayName(contact store.Contact) string { if name := cleanContactName(contact.FullName, contact); name != "" { return name diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 405663f..84c1f91 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -111,6 +111,10 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { } addContact(store.Contact{JID: "first-last", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, true) addContact(store.Contact{JID: "first-last-duplicate", Phone: "+15559990001", FirstName: "First", LastName: "Last"}, true) + addContact(store.Contact{JID: "recent-short", Phone: "+15559990008", FullName: "Recent", UpdatedAt: time.Unix(200, 0).UTC()}, true) + addContact(store.Contact{JID: "older-richer", Phone: "+15559990008", FullName: "Older Richer Name", UpdatedAt: time.Unix(100, 0).UTC()}, true) + addContact(store.Contact{JID: "equal-short", Phone: "+15559990009", FullName: "Pim"}, true) + addContact(store.Contact{JID: "equal-richer", Phone: "+15559990009", FullName: "Pim van den Berg"}, true) addContact(store.Contact{JID: "username-only", Phone: "+15559990002", Username: "handle", FullName: "@handle"}, true) addContact(store.Contact{JID: "bare-username-only", Phone: "+15559990006", Username: "handle", FullName: "Handle"}, true) addContact(store.Contact{JID: "phone-only", Phone: "+15559990003", FullName: "+15559990003"}, true) @@ -140,10 +144,10 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { t.Fatalf("json = %s err=%v", out.String(), err) } assertContactExportKeys(t, out.Bytes()) - if len(payload.Contacts) != 103 { - t.Fatalf("contacts = %d, want 103", len(payload.Contacts)) + if len(payload.Contacts) != 105 { + t.Fatalf("contacts = %d, want 105", len(payload.Contacts)) } - var sawFirstLast, sawShortPhonePerson bool + var sawFirstLast, sawShortPhonePerson, sawRecent, sawRicherEqual bool firstLastCount := 0 for _, contact := range payload.Contacts { if contact.DisplayName == "First Last" { @@ -152,6 +156,12 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { firstLastCount++ } } + if contact.DisplayName == "Recent" && contact.PhoneNumbers[0] == "+15559990008" { + sawRecent = true + } + if contact.DisplayName == "Pim van den Berg" && contact.PhoneNumbers[0] == "+15559990009" { + sawRicherEqual = true + } if contact.DisplayName == "Short Phone Person" && contact.PhoneNumbers[0] == "12345" { sawShortPhonePerson = true } @@ -170,6 +180,9 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { if contact.DisplayName == "Stale Peer" { t.Fatalf("stale contact without conversation evidence exported: %#v", contact) } + if contact.DisplayName == "Older Richer Name" || contact.DisplayName == "Pim" { + t.Fatalf("wrong duplicate contact name exported: %#v", contact) + } } if !sawFirstLast { t.Fatalf("missing composed first/last name: %#v", payload.Contacts) @@ -180,6 +193,12 @@ func TestContactsExportUsesContractShapeAndSkipsUnsafeNames(t *testing.T) { if !sawShortPhonePerson { t.Fatalf("missing short phone person: %#v", payload.Contacts) } + if !sawRecent { + t.Fatalf("missing newer duplicate contact name: %#v", payload.Contacts) + } + if !sawRicherEqual { + t.Fatalf("missing richer equal-time contact name: %#v", payload.Contacts) + } } func assertContactExportKeys(t *testing.T, data []byte) { From b79e37f64c5ea8cde0b422858c4664f2c0793f10 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 6 Jun 2026 17:44:45 -0700 Subject: [PATCH 8/8] fix: handle older Telegram archive schemas --- CHANGELOG.md | 5 ++ internal/store/schema.go | 14 +++-- internal/store/store.go | 8 ++- internal/store/store_test.go | 107 +++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afa5025..5e2dcb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ ### Added - Archive Telegram contact records from local Postbox imports. (#7; thanks @joshp123) +- Expose Telegram contacts through the crawlkit `contact-export` metadata command for Clawdex imports. (#9; thanks @joshp123) + +### Fixed + +- Migrate older local archives before creating topic indexes and tolerate nullable optional message fields from live Telegram data. ## [0.2.0] - 2026-05-31 diff --git a/internal/store/schema.go b/internal/store/schema.go index 980b538..ef1feba 100644 --- a/internal/store/schema.go +++ b/internal/store/schema.go @@ -113,6 +113,14 @@ create table if not exists messages ( pinned integer not null default 0 ); +create table if not exists sync_state ( + key text primary key, + value text not null, + updated_at integer not null +); +` + +const indexSQL = ` create index if not exists idx_messages_chat_ts on messages(chat_jid, ts); create index if not exists idx_messages_chat_msg on messages(chat_jid, msg_id); create index if not exists idx_messages_chat_topic_ts on messages(chat_jid, topic_id, ts); @@ -122,10 +130,4 @@ create index if not exists idx_messages_ts on messages(ts); create index if not exists idx_messages_sender on messages(sender_jid); create virtual table if not exists messages_fts using fts5(text, chat, sender, media); - -create table if not exists sync_state ( - key text primary key, - value text not null, - updated_at integer not null -); ` diff --git a/internal/store/store.go b/internal/store/store.go index 4a1a536..d5e79e5 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -203,6 +203,10 @@ func Open(ctx context.Context, path string) (*Store, error) { _ = db.Close() return nil, err } + if _, err := db.ExecContext(ctx, indexSQL); err != nil { + _ = db.Close() + return nil, err + } if _, err := db.ExecContext(ctx, fmt.Sprintf("pragma user_version = %d", schemaVersion)); err != nil { _ = db.Close() return nil, err @@ -525,11 +529,11 @@ func (s *Store) messages(ctx context.Context, filter MessageFilter, search bool) if filter.Limit <= 0 { filter.Limit = 50 } - query := `select source_pk,chat_jid,chat_name,msg_id,sender_jid,sender_name,ts,edit_ts,from_me,text,raw_type,message_type,media_type,media_title,media_path,media_url,media_size,coalesce(metadata_type,''),coalesce(metadata_title,''),coalesce(metadata_url,''),coalesce(metadata_json,''),starred,topic_id,reply_to_msg_id,reply_to_chat_jid,thread_id,forward_json,reactions_json,views,forwards,replies_count,pinned,'' from messages where 1=1` + query := `select source_pk,chat_jid,coalesce(chat_name,''),msg_id,coalesce(sender_jid,''),coalesce(sender_name,''),ts,coalesce(edit_ts,0),from_me,coalesce(text,''),raw_type,coalesce(message_type,''),coalesce(media_type,''),coalesce(media_title,''),coalesce(media_path,''),coalesce(media_url,''),coalesce(media_size,0),coalesce(metadata_type,''),coalesce(metadata_title,''),coalesce(metadata_url,''),coalesce(metadata_json,''),starred,coalesce(topic_id,''),coalesce(reply_to_msg_id,''),coalesce(reply_to_chat_jid,''),coalesce(thread_id,''),coalesce(forward_json,''),coalesce(reactions_json,''),coalesce(views,0),coalesce(forwards,0),coalesce(replies_count,0),coalesce(pinned,0),'' from messages where 1=1` args := []any{} prefix := "" if search { - query = `select m.source_pk,m.chat_jid,m.chat_name,m.msg_id,m.sender_jid,m.sender_name,m.ts,m.edit_ts,m.from_me,m.text,m.raw_type,m.message_type,m.media_type,m.media_title,m.media_path,m.media_url,m.media_size,coalesce(m.metadata_type,''),coalesce(m.metadata_title,''),coalesce(m.metadata_url,''),coalesce(m.metadata_json,''),m.starred,m.topic_id,m.reply_to_msg_id,m.reply_to_chat_jid,m.thread_id,m.forward_json,m.reactions_json,m.views,m.forwards,m.replies_count,m.pinned,snippet(messages_fts,0,'[',']','...',12) from messages_fts f join messages m on m.rowid=f.rowid where messages_fts match ?` + query = `select m.source_pk,m.chat_jid,coalesce(m.chat_name,''),m.msg_id,coalesce(m.sender_jid,''),coalesce(m.sender_name,''),m.ts,coalesce(m.edit_ts,0),m.from_me,coalesce(m.text,''),m.raw_type,coalesce(m.message_type,''),coalesce(m.media_type,''),coalesce(m.media_title,''),coalesce(m.media_path,''),coalesce(m.media_url,''),coalesce(m.media_size,0),coalesce(m.metadata_type,''),coalesce(m.metadata_title,''),coalesce(m.metadata_url,''),coalesce(m.metadata_json,''),m.starred,coalesce(m.topic_id,''),coalesce(m.reply_to_msg_id,''),coalesce(m.reply_to_chat_jid,''),coalesce(m.thread_id,''),coalesce(m.forward_json,''),coalesce(m.reactions_json,''),coalesce(m.views,0),coalesce(m.forwards,0),coalesce(m.replies_count,0),coalesce(m.pinned,0),snippet(messages_fts,0,'[',']','...',12) from messages_fts f join messages m on m.rowid=f.rowid where messages_fts match ?` args = append(args, filter.Query) prefix = "m." } diff --git a/internal/store/store_test.go b/internal/store/store_test.go index eedae1a..743c779 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -228,6 +228,113 @@ pragma user_version = 2; } } +func TestOpenMigratesSchema1BeforeCreatingTopicIndex(t *testing.T) { + t.Parallel() + ctx := context.Background() + path := filepath.Join(t.TempDir(), "schema1.db") + db, err := sql.Open("sqlite", path) + if err != nil { + t.Fatal(err) + } + if _, err := db.ExecContext(ctx, ` +create table chats ( + id integer primary key, + kind text not null, + name text, + username text, + last_message_at integer, + unread_count integer not null default 0, + message_count integer not null default 0 +); +create table contacts ( + jid text primary key, + phone text, + full_name text, + first_name text, + last_name text, + business_name text, + username text, + lid text, + about_text text, + updated_at integer +); +create table messages ( + rowid integer primary key autoincrement, + source_pk integer not null unique, + chat_jid text not null, + chat_name text, + msg_id text not null, + sender_jid text, + sender_name text, + ts integer not null, + from_me integer not null, + text text, + raw_type integer not null default 0, + message_type text, + media_type text, + media_title text, + media_path text, + media_url text, + media_size integer, + starred integer not null default 0 +); +create index idx_messages_chat_ts on messages(chat_jid, ts); +pragma user_version = 1; +`); err != nil { + _ = db.Close() + t.Fatal(err) + } + if err := db.Close(); err != nil { + t.Fatal(err) + } + + st := openTestStore(t, path) + cols, err := columns(ctx, st.db, "messages") + if err != nil { + t.Fatal(err) + } + if !cols["topic_id"] { + t.Fatal("missing migrated topic_id column") + } + var indexName string + if err := st.db.QueryRowContext(ctx, `select name from sqlite_master where type='index' and name='idx_messages_chat_topic_ts'`).Scan(&indexName); err != nil { + t.Fatal(err) + } + if indexName != "idx_messages_chat_topic_ts" { + t.Fatalf("topic index = %q", indexName) + } + var version int + if err := st.db.QueryRowContext(ctx, "pragma user_version").Scan(&version); err != nil { + t.Fatal(err) + } + if version != schemaVersion { + t.Fatalf("user_version = %d, want %d", version, schemaVersion) + } +} + +func TestMessagesToleratesNullableOptionalFields(t *testing.T) { + t.Parallel() + ctx := context.Background() + st := openTestStore(t, filepath.Join(t.TempDir(), "nullable-messages.db")) + if _, err := st.db.ExecContext(ctx, `insert into messages(source_pk,chat_jid,msg_id,ts,from_me,raw_type,starred) values(?,?,?,?,?,?,?)`, 1, "42", "1", unix(time.Date(2026, 6, 6, 12, 0, 0, 0, time.UTC)), 0, 0, 0); err != nil { + t.Fatal(err) + } + + messages, err := st.Messages(ctx, MessageFilter{ChatJID: "42", Limit: 10}) + if err != nil { + t.Fatal(err) + } + if len(messages) != 1 { + t.Fatalf("messages = %d, want 1", len(messages)) + } + if messages[0].EditTime.IsZero() == false { + t.Fatalf("edit time = %v, want zero", messages[0].EditTime) + } + if messages[0].ChatName != "" || messages[0].TopicID != "" || messages[0].ForwardJSON != "" { + t.Fatalf("nullable fields not normalized: %#v", messages[0]) + } +} + func TestUpsertChatPreservesUnrelatedChats(t *testing.T) { t.Parallel() ctx := context.Background()