Skip to content

Commit 15b8c4a

Browse files
authored
Merge pull request #146 from mgajewskik/fix/supermemory-migration-v3-api
fix(migrate): support Supermemory v3 documents list API
2 parents 56dda38 + 5a6a490 commit 15b8c4a

File tree

2 files changed

+156
-41
lines changed

2 files changed

+156
-41
lines changed

tools/migrate/providers/base.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from abc import ABC, abstractmethod
2-
from typing import Dict, Any, AsyncGenerator, List
2+
from typing import Dict, Any, AsyncGenerator, List, Optional
3+
import asyncio
34
import httpx
45
from ..schemas import MigrationConfig, MigrationRecord
56
from ..utils import RateLimiter, logger
67

8+
79
class BaseProvider(ABC):
810
def __init__(self, config: MigrationConfig):
911
self.config = config
@@ -13,27 +15,46 @@ def __init__(self, config: MigrationConfig):
1315
async def close(self):
1416
await self.client.aclose()
1517

16-
async def _get(self, url: str, headers: Dict[str, str] = None) -> Any:
17-
await self.rate_limiter.wait()
18-
try:
19-
response = await self.client.get(url, headers=headers)
20-
if response.status_code == 429:
21-
retry_after = int(response.headers.get("retry-after", "60"))
22-
logger.warning(f"Rate limit hit. Waiting {retry_after}s...")
23-
await self.rate_limiter.wait() # Simplified wait, ideally sleep
24-
return await self._get(url, headers)
25-
response.raise_for_status()
26-
return response.json()
27-
except httpx.HTTPError as e:
28-
logger.error(f"HTTP Error: {e}")
29-
raise
18+
async def _get(
19+
self,
20+
url: str,
21+
headers: Optional[Dict[str, str]] = None,
22+
max_retries: int = 8,
23+
) -> Any:
24+
for attempt in range(max_retries + 1):
25+
await self.rate_limiter.wait()
26+
try:
27+
response = await self.client.get(url, headers=headers or {})
28+
if response.status_code != 429:
29+
response.raise_for_status()
30+
return response.json()
31+
32+
if attempt >= max_retries:
33+
response.raise_for_status()
34+
35+
retry_after_header = response.headers.get("retry-after", "5")
36+
try:
37+
retry_after = float(retry_after_header)
38+
except (TypeError, ValueError):
39+
retry_after = 5.0
40+
41+
wait_seconds = max(1.0, min(retry_after, 60.0))
42+
logger.warning(
43+
f"Rate limit hit. Waiting {wait_seconds:.1f}s... (attempt {attempt + 1}/{max_retries})"
44+
)
45+
await asyncio.sleep(wait_seconds)
46+
except httpx.HTTPError as e:
47+
logger.error(f"HTTP Error: {e}")
48+
raise
49+
50+
raise RuntimeError("Max retries exceeded")
3051

3152
@abstractmethod
3253
async def connect(self) -> Dict[str, Any]:
3354
"""Test connection and return stats"""
3455
pass
3556

3657
@abstractmethod
37-
async def export(self) -> AsyncGenerator[MigrationRecord, None]:
58+
def export(self) -> AsyncGenerator[MigrationRecord, None]:
3859
"""Yield migration records"""
39-
pass
60+
raise NotImplementedError
Lines changed: 118 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,166 @@
1-
from typing import Dict, Any, AsyncGenerator, List
1+
import asyncio
2+
from typing import Dict, Any, AsyncGenerator
23
from .base import BaseProvider
34
from ..schemas import MigrationRecord
45
from ..utils import logger
56

7+
68
class SupermemoryProvider(BaseProvider):
79
def __init__(self, config):
810
super().__init__(config)
911
self.base_url = config.source_url or "https://api.supermemory.ai"
1012
self.headers = {
1113
"Authorization": f"Bearer {config.api_key}",
12-
"Content-Type": "application/json"
14+
"Content-Type": "application/json",
1315
}
1416

1517
async def connect(self) -> Dict[str, Any]:
1618
try:
17-
url = f"{self.base_url}/v3/documents?limit=1"
18-
data = await self._get(url, headers=self.headers)
19-
return {"ok": True, "documents": data.get("total", 0)}
19+
data = await self._post(
20+
f"{self.base_url}/v3/documents/list",
21+
json={"page": 1, "limit": 1, "includeContent": False},
22+
headers=self.headers,
23+
)
24+
pagination = data.get("pagination") or {}
25+
total = (
26+
pagination.get("totalItems")
27+
or pagination.get("totalDocuments")
28+
or data.get("total")
29+
or 0
30+
)
31+
return {"ok": True, "documents": total}
2032
except Exception as e:
21-
raise Exception(f"Supermemory connection failed: {e}")
33+
raise Exception(f"Supermemory connection failed (v3): {e}")
34+
35+
async def _post(
36+
self,
37+
url: str,
38+
json: Dict[str, Any],
39+
headers: Dict[str, str],
40+
max_retries: int = 8,
41+
) -> Any:
42+
for attempt in range(max_retries + 1):
43+
await self.rate_limiter.wait()
44+
response = await self.client.post(url, json=json, headers=headers)
45+
46+
if response.status_code != 429:
47+
response.raise_for_status()
48+
return response.json()
49+
50+
if attempt >= max_retries:
51+
response.raise_for_status()
52+
53+
retry_after_header = response.headers.get("retry-after", "5")
54+
try:
55+
retry_after = float(retry_after_header)
56+
except (TypeError, ValueError):
57+
retry_after = 5.0
58+
59+
wait_seconds = max(1.0, min(retry_after, 60.0))
60+
logger.warning(
61+
f"[SUPERMEMORY] Rate limit hit. Waiting {wait_seconds:.1f}s (attempt {attempt + 1}/{max_retries})..."
62+
)
63+
await asyncio.sleep(wait_seconds)
64+
65+
raise RuntimeError("Max retries exceeded while calling Supermemory API")
2266

2367
async def export(self) -> AsyncGenerator[MigrationRecord, None]:
2468
try:
2569
logger.info("[SUPERMEMORY] Fetching documents...")
2670
page = 1
2771
limit = 100
2872
total = 0
29-
73+
3074
while True:
31-
url = f"{self.base_url}/v3/documents?page={page}&limit={limit}"
32-
data = await self._get(url, headers=self.headers)
33-
batch = data.get("documents", []) or data.get("data", [])
34-
75+
data = await self._post(
76+
f"{self.base_url}/v3/documents/list",
77+
json={"page": page, "limit": limit, "includeContent": True},
78+
headers=self.headers,
79+
)
80+
batch = (
81+
data.get("memories", [])
82+
or data.get("documents", [])
83+
or data.get("data", [])
84+
)
85+
3586
if not batch:
87+
if page == 1:
88+
keys = (
89+
sorted(list(data.keys())) if isinstance(data, dict) else []
90+
)
91+
logger.warning(
92+
f"[SUPERMEMORY] No documents found in first page. Response keys: {keys}"
93+
)
3694
break
37-
95+
3896
for doc in batch:
3997
yield self._transform(doc)
4098
total += 1
4199
if total % 100 == 0:
42100
logger.info(f"[SUPERMEMORY] Exported {total} documents...")
43101

44-
page += 1
45-
if len(batch) < limit:
102+
pagination = data.get("pagination") or {}
103+
current_page = pagination.get("currentPage", page)
104+
total_pages = pagination.get("totalPages")
105+
has_next = pagination.get("hasNext")
106+
107+
if total_pages is not None and current_page >= total_pages:
108+
break
109+
if has_next is False:
110+
break
111+
if total_pages is None and has_next is None and len(batch) < limit:
46112
break
113+
114+
page += 1
47115
except Exception as e:
48116
logger.error(f"[SUPERMEMORY] Export failed: {e}")
49117
raise
50118

51119
def _transform(self, d: Dict) -> MigrationRecord:
52120
from dateutil import parser
53-
121+
54122
created_at = 0
55-
if d.get("created_at"):
56-
try:
57-
created_at = int(parser.parse(d["created_at"]).timestamp() * 1000)
58-
except: pass
123+
created_at_raw = d.get("created_at") or d.get("createdAt")
124+
if created_at_raw:
125+
if isinstance(created_at_raw, (int, float)):
126+
created_at = int(
127+
created_at_raw if created_at_raw > 1e12 else created_at_raw * 1000
128+
)
129+
else:
130+
try:
131+
created_at = int(parser.parse(created_at_raw).timestamp() * 1000)
132+
except:
133+
pass
134+
135+
container_tags = d.get("containerTags") or []
136+
uid = (
137+
d.get("user_id")
138+
or d.get("owner_id")
139+
or d.get("containerTag")
140+
or (container_tags[0] if container_tags else None)
141+
or "default"
142+
)
143+
144+
content = (
145+
d.get("content") or d.get("text") or d.get("body") or d.get("summary") or ""
146+
)
59147

60148
return MigrationRecord(
61-
id=str(d.get("id") or d.get("document_id") or f"sm_{created_at}"),
62-
uid=str(d.get("user_id") or d.get("owner_id") or "default"),
63-
content=d.get("content") or d.get("text") or d.get("body") or "",
149+
id=str(
150+
d.get("id")
151+
or d.get("document_id")
152+
or d.get("customId")
153+
or f"sm_{created_at}"
154+
),
155+
uid=str(uid),
156+
content=content,
64157
tags=d.get("tags") or d.get("labels") or [],
65158
metadata={
66159
"provider": "supermemory",
67160
"source": d.get("source"),
68161
"url": d.get("url"),
69-
"original_metadata": d.get("metadata", {})
162+
"container_tags": container_tags,
163+
"original_metadata": d.get("metadata", {}),
70164
},
71-
created_at=created_at
165+
created_at=created_at,
72166
)

0 commit comments

Comments
 (0)