Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/telecrawl
coverage.out
dist/
__pycache__/
*.pyc
.DS_Store
8 changes: 5 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Changelog

All notable changes to this project are documented here.
## [0.2.1] - Unreleased

### Added

The format follows Keep a Changelog, and this project uses Semantic Versioning.
- Archive Telegram contact records from local Postbox imports. (#7; thanks @joshp123)

## [0.2.1] - Unreleased
## [0.2.0] - 2026-05-31

### Added

Expand Down
29 changes: 19 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ It is local-first:

- Normal archive/search commands do not upload data.
- `backup push` uploads only age-encrypted shards when you run it explicitly.
- Telegram message text, chat names, sender names, and media metadata stay inside
- Telegram message text, chat names, sender names, contact phone numbers,
contact usernames, avatar path metadata, and media metadata stay inside
encrypted backup payloads.

## Install
Expand Down Expand Up @@ -89,9 +90,12 @@ fetch is attempted, so `--fetch-media` only tries media that is not already in
the local archive.

Native Postbox can tag link previews, polls, geo/live-geo, service messages, or
deleted messages as broad media candidates. `telecrawl` tries those during
`--fetch-media`, but only keeps them as media rows when Telegram returns a
downloadable file.
deleted messages as broad media candidates. `telecrawl` archives their decoded
message metadata separately from binary media, and only keeps them as media rows
when Telegram returns a downloadable file.
`metadata_json` is a local source-native Postbox payload for later rendering or
search; it is not a cross-source schema and can contain private Telegram
metadata.

When no `--source` is provided on macOS, `telecrawl` checks Telegram Desktop
`tdata` first, then the native Telegram for macOS group container. No backend
Expand All @@ -103,14 +107,18 @@ telecrawl import --path "$HOME/Library/Group Containers/6N38VWS5BX.ru.keepcoder.

Native macOS imports include every local `account-*` database they find; if more
than one account is present, stored chat and sender IDs are account-scoped to
avoid collisions. They archive cached media by default. `--fetch-media` also uses
the existing native Telegram session to fetch missing cloud media when account
auth data is present; this does not launch Telegram or start a login/2FA flow.
avoid collisions. They archive cached media by default and store Telegram peer
records as contacts for message enrichment. Contacts can include phone numbers,
usernames, and archived avatar paths when those values exist locally, and are
visible through `telecrawl contacts`. `--fetch-media` also uses the existing
native Telegram session to fetch missing cloud media when account auth data is
present; this does not launch Telegram or start a login/2FA flow.

Useful reads:

```bash
telecrawl folders
telecrawl contacts
telecrawl chats --limit 20
telecrawl chats --folder FOLDER_ID
telecrawl chats --unread
Expand Down Expand Up @@ -230,9 +238,10 @@ Git can still see cleartext metadata:
- plaintext shard hashes
- backup cadence and which encrypted shards changed

Git cannot read message text, chat names, sender names, or media metadata without
an age identity. Binary media files archived in `~/.telecrawl/media/` are local
only and are not included in backup shards.
Git cannot read message text, chat names, sender names, contact phone numbers,
contact usernames, avatar path metadata, or media metadata without an age
identity. Binary media files and cached avatar files archived in
`~/.telecrawl/media/` are local only and are not included in backup shards.

Keep `~/.telecrawl/age.key` private. If you lose it and no other recipient can
decrypt the backup, the encrypted backup cannot be restored.
Expand Down
142 changes: 140 additions & 2 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"sort"
"strings"
"time"
"unicode"

"github.com/openclaw/telecrawl/internal/backup"
"github.com/openclaw/telecrawl/internal/store"
Expand Down Expand Up @@ -102,6 +103,8 @@ func (r *runtime) dispatch(args []string) error {
return r.runChats(args[1:])
case "folders":
return r.runFolders(args[1:])
case "contacts":
return r.runContacts(args[1:])
case "topics":
return r.runTopics(args[1:])
case "messages":
Expand Down Expand Up @@ -257,14 +260,14 @@ func storeImportResult(ctx context.Context, st *store.Store, result *telegramdes
}
refreshImportMediaStats(result)
if strings.TrimSpace(chatFilter) == "" {
return st.ReplaceAll(ctx, result.Stats, result.Chats, result.Folders, result.FolderChats, result.Topics, result.Messages)
return st.ReplaceAll(ctx, result.Stats, result.Contacts, result.Chats, result.Folders, result.FolderChats, result.Topics, result.Messages)
}
if len(result.Chats) == 0 {
return fmt.Errorf("telegram import returned no chats for --chat %s", chatFilter)
}
for _, chat := range result.Chats {
partial := importResultForChat(*result, chat.JID)
if err := st.UpsertChat(ctx, partial.Stats, chat.JID, partial.Chats, partial.Folders, partial.FolderChats, partial.Topics, partial.Messages); err != nil {
if err := st.UpsertChat(ctx, partial.Stats, chat.JID, partial.Contacts, partial.Chats, partial.Folders, partial.FolderChats, partial.Topics, partial.Messages); err != nil {
return err
}
}
Expand Down Expand Up @@ -394,6 +397,29 @@ func importResultForChat(result telegramdesktop.ImportResult, chatJID string) te
out.Messages = append(out.Messages, message)
}
}
out.Contacts = contactsForMessages(result.Contacts, out.Messages, chatJID)
return out
}

func contactsForMessages(contacts []store.Contact, messages []store.Message, chatJID string) []store.Contact {
peerIDs := map[string]struct{}{}
if strings.TrimSpace(chatJID) != "" {
peerIDs[chatJID] = struct{}{}
}
for _, message := range messages {
if strings.TrimSpace(message.ChatJID) != "" {
peerIDs[message.ChatJID] = struct{}{}
}
if strings.TrimSpace(message.SenderJID) != "" {
peerIDs[message.SenderJID] = struct{}{}
}
}
out := make([]store.Contact, 0, len(peerIDs))
for _, contact := range contacts {
if _, ok := peerIDs[contact.JID]; ok {
out = append(out, contact)
}
}
return out
}

Expand Down Expand Up @@ -440,6 +466,116 @@ func (r *runtime) runFolders(args []string) error {
})
}

func (r *runtime) runContacts(args []string) error {
if len(args) > 0 && args[0] == "export" {
return r.runContactsExport(args[1:])
}
fs := flag.NewFlagSet("telecrawl contacts", flag.ContinueOnError)
fs.SetOutput(io.Discard)
limit := fs.Int("limit", 100, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("contacts takes flags only"))
}
return r.withStore(func(st *store.Store) error {
contacts, err := st.ListContacts(r.ctx, *limit)
if err != nil {
return err
}
return r.print(contacts)
})
}

type contactExport struct {
Contacts []exportedContact `json:"contacts"`
}

type exportedContact struct {
DisplayName string `json:"display_name"`
PhoneNumbers []string `json:"phone_numbers"`
}

func (r *runtime) runContactsExport(args []string) error {
fs := flag.NewFlagSet("telecrawl contacts export", flag.ContinueOnError)
fs.SetOutput(io.Discard)
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("contacts export takes no arguments"))
}
return r.withStore(func(st *store.Store) error {
contacts, err := st.ExportContacts(r.ctx)
if err != nil {
return err
}
return r.print(contactExport{Contacts: exportContacts(contacts)})
})
}

func exportContacts(contacts []store.Contact) []exportedContact {
out := make([]exportedContact, 0, len(contacts))
for _, contact := range contacts {
name := contactDisplayName(contact)
phone := strings.TrimSpace(contact.Phone)
if name == "" || phone == "" {
continue
}
out = append(out, exportedContact{DisplayName: name, PhoneNumbers: []string{phone}})
}
return out
}

func contactDisplayName(contact store.Contact) string {
if name := cleanContactName(contact.FullName, contact); name != "" {
return name
}
return cleanContactName(strings.TrimSpace(contact.FirstName+" "+contact.LastName), contact)
}

func cleanContactName(name string, contact store.Contact) string {
name = strings.TrimSpace(name)
switch {
case name == "":
return ""
case name == strings.TrimSpace(contact.Phone):
return ""
case name == strings.TrimSpace(contact.JID):
return ""
case name == strings.TrimSpace(contact.Username):
return ""
case name == strings.TrimSpace(contact.LID):
return ""
case strings.HasPrefix(name, "@"):
return ""
case looksLikePhone(name):
return ""
default:
return name
}
}

func looksLikePhone(value string) bool {
value = strings.TrimSpace(value)
if value == "" {
return false
}
digits := 0
other := 0
for _, r := range value {
switch {
case unicode.IsDigit(r):
digits++
case strings.ContainsRune(" +()-.", r):
default:
other++
}
}
return digits >= 5 && other == 0
}

func (r *runtime) runTopics(args []string) error {
fs := flag.NewFlagSet("telecrawl topics", flag.ContinueOnError)
fs.SetOutput(io.Discard)
Expand Down Expand Up @@ -765,6 +901,8 @@ usage:
telecrawl [--json] import [--path PATH] [--chat ID] [--dialogs-limit N] [--messages-limit N] [--fetch-media]
telecrawl [--json] status
telecrawl [--json] folders
telecrawl [--json] contacts [--limit N]
telecrawl --json contacts export
telecrawl [--json] chats [--limit N] [--unread] [--folder ID]
telecrawl [--json] topics --chat ID [--limit N]
telecrawl [--json] messages [--chat ID] [--topic ID] [--limit N] [--after DATE]
Expand Down
Loading