|
1 | | -from typing import Dict, Any, AsyncGenerator, List |
| 1 | +import asyncio |
| 2 | +from typing import Dict, Any, AsyncGenerator |
2 | 3 | from .base import BaseProvider |
3 | 4 | from ..schemas import MigrationRecord |
4 | 5 | from ..utils import logger |
5 | 6 |
|
| 7 | + |
6 | 8 | class SupermemoryProvider(BaseProvider): |
7 | 9 | def __init__(self, config): |
8 | 10 | super().__init__(config) |
9 | 11 | self.base_url = config.source_url or "https://api.supermemory.ai" |
10 | 12 | self.headers = { |
11 | 13 | "Authorization": f"Bearer {config.api_key}", |
12 | | - "Content-Type": "application/json" |
| 14 | + "Content-Type": "application/json", |
13 | 15 | } |
14 | 16 |
|
15 | 17 | async def connect(self) -> Dict[str, Any]: |
16 | 18 | try: |
17 | | - url = f"{self.base_url}/v3/documents?limit=1" |
18 | | - data = await self._get(url, headers=self.headers) |
19 | | - return {"ok": True, "documents": data.get("total", 0)} |
| 19 | + data = await self._post( |
| 20 | + f"{self.base_url}/v3/documents/list", |
| 21 | + json={"page": 1, "limit": 1, "includeContent": False}, |
| 22 | + headers=self.headers, |
| 23 | + ) |
| 24 | + pagination = data.get("pagination") or {} |
| 25 | + total = ( |
| 26 | + pagination.get("totalItems") |
| 27 | + or pagination.get("totalDocuments") |
| 28 | + or data.get("total") |
| 29 | + or 0 |
| 30 | + ) |
| 31 | + return {"ok": True, "documents": total} |
20 | 32 | except Exception as e: |
21 | | - raise Exception(f"Supermemory connection failed: {e}") |
| 33 | + raise Exception(f"Supermemory connection failed (v3): {e}") |
| 34 | + |
| 35 | + async def _post( |
| 36 | + self, |
| 37 | + url: str, |
| 38 | + json: Dict[str, Any], |
| 39 | + headers: Dict[str, str], |
| 40 | + max_retries: int = 8, |
| 41 | + ) -> Any: |
| 42 | + for attempt in range(max_retries + 1): |
| 43 | + await self.rate_limiter.wait() |
| 44 | + response = await self.client.post(url, json=json, headers=headers) |
| 45 | + |
| 46 | + if response.status_code != 429: |
| 47 | + response.raise_for_status() |
| 48 | + return response.json() |
| 49 | + |
| 50 | + if attempt >= max_retries: |
| 51 | + response.raise_for_status() |
| 52 | + |
| 53 | + retry_after_header = response.headers.get("retry-after", "5") |
| 54 | + try: |
| 55 | + retry_after = float(retry_after_header) |
| 56 | + except (TypeError, ValueError): |
| 57 | + retry_after = 5.0 |
| 58 | + |
| 59 | + wait_seconds = max(1.0, min(retry_after, 60.0)) |
| 60 | + logger.warning( |
| 61 | + f"[SUPERMEMORY] Rate limit hit. Waiting {wait_seconds:.1f}s (attempt {attempt + 1}/{max_retries})..." |
| 62 | + ) |
| 63 | + await asyncio.sleep(wait_seconds) |
| 64 | + |
| 65 | + raise RuntimeError("Max retries exceeded while calling Supermemory API") |
22 | 66 |
|
23 | 67 | async def export(self) -> AsyncGenerator[MigrationRecord, None]: |
24 | 68 | try: |
25 | 69 | logger.info("[SUPERMEMORY] Fetching documents...") |
26 | 70 | page = 1 |
27 | 71 | limit = 100 |
28 | 72 | total = 0 |
29 | | - |
| 73 | + |
30 | 74 | while True: |
31 | | - url = f"{self.base_url}/v3/documents?page={page}&limit={limit}" |
32 | | - data = await self._get(url, headers=self.headers) |
33 | | - batch = data.get("documents", []) or data.get("data", []) |
34 | | - |
| 75 | + data = await self._post( |
| 76 | + f"{self.base_url}/v3/documents/list", |
| 77 | + json={"page": page, "limit": limit, "includeContent": True}, |
| 78 | + headers=self.headers, |
| 79 | + ) |
| 80 | + batch = ( |
| 81 | + data.get("memories", []) |
| 82 | + or data.get("documents", []) |
| 83 | + or data.get("data", []) |
| 84 | + ) |
| 85 | + |
35 | 86 | if not batch: |
| 87 | + if page == 1: |
| 88 | + keys = ( |
| 89 | + sorted(list(data.keys())) if isinstance(data, dict) else [] |
| 90 | + ) |
| 91 | + logger.warning( |
| 92 | + f"[SUPERMEMORY] No documents found in first page. Response keys: {keys}" |
| 93 | + ) |
36 | 94 | break |
37 | | - |
| 95 | + |
38 | 96 | for doc in batch: |
39 | 97 | yield self._transform(doc) |
40 | 98 | total += 1 |
41 | 99 | if total % 100 == 0: |
42 | 100 | logger.info(f"[SUPERMEMORY] Exported {total} documents...") |
43 | 101 |
|
44 | | - page += 1 |
45 | | - if len(batch) < limit: |
| 102 | + pagination = data.get("pagination") or {} |
| 103 | + current_page = pagination.get("currentPage", page) |
| 104 | + total_pages = pagination.get("totalPages") |
| 105 | + has_next = pagination.get("hasNext") |
| 106 | + |
| 107 | + if total_pages is not None and current_page >= total_pages: |
| 108 | + break |
| 109 | + if has_next is False: |
| 110 | + break |
| 111 | + if total_pages is None and has_next is None and len(batch) < limit: |
46 | 112 | break |
| 113 | + |
| 114 | + page += 1 |
47 | 115 | except Exception as e: |
48 | 116 | logger.error(f"[SUPERMEMORY] Export failed: {e}") |
49 | 117 | raise |
50 | 118 |
|
51 | 119 | def _transform(self, d: Dict) -> MigrationRecord: |
52 | 120 | from dateutil import parser |
53 | | - |
| 121 | + |
54 | 122 | created_at = 0 |
55 | | - if d.get("created_at"): |
56 | | - try: |
57 | | - created_at = int(parser.parse(d["created_at"]).timestamp() * 1000) |
58 | | - except: pass |
| 123 | + created_at_raw = d.get("created_at") or d.get("createdAt") |
| 124 | + if created_at_raw: |
| 125 | + if isinstance(created_at_raw, (int, float)): |
| 126 | + created_at = int( |
| 127 | + created_at_raw if created_at_raw > 1e12 else created_at_raw * 1000 |
| 128 | + ) |
| 129 | + else: |
| 130 | + try: |
| 131 | + created_at = int(parser.parse(created_at_raw).timestamp() * 1000) |
| 132 | + except: |
| 133 | + pass |
| 134 | + |
| 135 | + container_tags = d.get("containerTags") or [] |
| 136 | + uid = ( |
| 137 | + d.get("user_id") |
| 138 | + or d.get("owner_id") |
| 139 | + or d.get("containerTag") |
| 140 | + or (container_tags[0] if container_tags else None) |
| 141 | + or "default" |
| 142 | + ) |
| 143 | + |
| 144 | + content = ( |
| 145 | + d.get("content") or d.get("text") or d.get("body") or d.get("summary") or "" |
| 146 | + ) |
59 | 147 |
|
60 | 148 | return MigrationRecord( |
61 | | - id=str(d.get("id") or d.get("document_id") or f"sm_{created_at}"), |
62 | | - uid=str(d.get("user_id") or d.get("owner_id") or "default"), |
63 | | - content=d.get("content") or d.get("text") or d.get("body") or "", |
| 149 | + id=str( |
| 150 | + d.get("id") |
| 151 | + or d.get("document_id") |
| 152 | + or d.get("customId") |
| 153 | + or f"sm_{created_at}" |
| 154 | + ), |
| 155 | + uid=str(uid), |
| 156 | + content=content, |
64 | 157 | tags=d.get("tags") or d.get("labels") or [], |
65 | 158 | metadata={ |
66 | 159 | "provider": "supermemory", |
67 | 160 | "source": d.get("source"), |
68 | 161 | "url": d.get("url"), |
69 | | - "original_metadata": d.get("metadata", {}) |
| 162 | + "container_tags": container_tags, |
| 163 | + "original_metadata": d.get("metadata", {}), |
70 | 164 | }, |
71 | | - created_at=created_at |
| 165 | + created_at=created_at, |
72 | 166 | ) |
0 commit comments