Skip to content

Commit 20b525c

Browse files
committed
feat(arkitect): responses
1 parent d8dd7f2 commit 20b525c

28 files changed

+20300
-152
lines changed

arkitect/core/component/memory/in_memory_memory_service.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,18 @@
3636
past interactions to help answer the user’s new question.
3737
"""
3838

39+
DEFAULT_SEARCH_LLM_MODEL = "doubao-1-5-pro-32k-250115"
40+
3941

4042
class InMemoryMemoryService(BaseMemoryService):
41-
def __init__(self) -> None:
43+
def __init__(
44+
self,
45+
default_search_model: str = DEFAULT_SEARCH_LLM_MODEL,
46+
default_search_prompt: str = DEFAULT_SEARCH_MEM_PROMPT,
47+
) -> None:
48+
self.default_search_model = default_search_model
49+
self.default_search_prompt = default_search_prompt
50+
4251
self.memory: dict = {}
4352
self._cached_query: dict = {}
4453
self._llm = AsyncArk()
@@ -80,11 +89,11 @@ async def search_memory(
8089
content = format_ark_message_as_string(memory)
8190
results += content
8291
summary = await self._llm.chat.completions.create(
83-
model="doubao-1-5-pro-32k-250115",
92+
model=self.default_search_model,
8493
messages=[
8594
{
8695
"role": "system",
87-
"content": DEFAULT_SEARCH_MEM_PROMPT,
96+
"content": self.default_search_prompt,
8897
},
8998
{
9099
"role": "user",
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import json
2+
import os
3+
import re
4+
import time
5+
6+
from dotenv import load_dotenv
7+
from firecrawl import FirecrawlApp
8+
9+
10+
def extract_listing_id(url):
11+
"""Extracts the listing ID from a PropertyGuru URL."""
12+
match = re.search(r"-(\d+)$", url)
13+
if match:
14+
return match.group(1)
15+
return None
16+
17+
18+
def crawl_and_save_listings(
19+
json_filepath="listings.json", output_folder="crawled_listings"
20+
):
21+
"""
22+
Reads listing URLs from a JSON file, crawls them using Firecrawl,
23+
and saves the content as markdown files.
24+
"""
25+
load_dotenv() # Load environment variables from .env file, if present
26+
api_key = os.getenv("FIRECRAWL_API_KEY")
27+
28+
if not api_key:
29+
print("Error: FIRECRAWL_API_KEY environment variable not set.")
30+
return
31+
32+
try:
33+
print(f"Reading listings from {json_filepath}...")
34+
with open(json_filepath, "r", encoding="utf-8") as f:
35+
listings = json.load(f)
36+
except FileNotFoundError:
37+
print(f"Error: {json_filepath} not found.")
38+
return
39+
except json.JSONDecodeError:
40+
print(f"Error: Could not decode JSON from {json_filepath}.")
41+
return
42+
43+
if not os.path.exists(output_folder):
44+
print(f"Creating output folder: {output_folder}...")
45+
os.makedirs(output_folder)
46+
47+
app = FirecrawlApp(api_key=api_key)
48+
print(f"Found {len(listings)} listings to crawl.")
49+
50+
for i, listing in enumerate(listings):
51+
url = listing.get("url")
52+
if not url:
53+
print(f"Skipping listing {i+1} due to missing URL.")
54+
continue
55+
56+
listing_id = extract_listing_id(url)
57+
if not listing_id:
58+
print(f"Skipping URL (could not extract ID): {url}")
59+
continue
60+
61+
output_md_filepath = os.path.join(output_folder, f"{listing_id}.md")
62+
print(f"Crawling ({i+1}/{len(listings)}): {url} (ID: {listing_id})")
63+
64+
try:
65+
crawl_result = app.scrape_url(
66+
url,
67+
formats=["markdown"],
68+
only_main_content=True,
69+
)
70+
71+
if crawl_result and crawl_result.markdown:
72+
with open(output_md_filepath, "w", encoding="utf-8") as md_file:
73+
md_file.write(crawl_result.markdown)
74+
print(f"Successfully saved: {output_md_filepath}")
75+
else:
76+
print(
77+
f"Failed to get markdown content for {url}. Response: {crawl_result}"
78+
)
79+
# Save an empty file or error message if preferred
80+
with open(output_md_filepath, "w", encoding="utf-8") as md_file:
81+
md_file.write(
82+
f"# Error crawling URL: {url}\n\nFirecrawl response did not contain markdown."
83+
)
84+
85+
except Exception as e:
86+
print(f"Error crawling {url}: {e}")
87+
# Optionally, save an error message to the file
88+
with open(output_md_filepath, "w", encoding="utf-8") as md_file:
89+
md_file.write(f"# Error crawling URL: {url}\n\nException: {e}")
90+
91+
# Add a small delay to be respectful to the server and API rate limits
92+
if i < len(listings) - 1: # Don't sleep after the last item
93+
time.sleep(1) # 1-second delay
94+
95+
print("Crawling process completed.")
96+
97+
98+
if __name__ == "__main__":
99+
crawl_and_save_listings()

0 commit comments

Comments
 (0)