Skip to content

Commit

Permalink
facts script improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
josancamon19 committed Nov 28, 2024
1 parent 0bef2cf commit b64becc
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 37 deletions.
63 changes: 31 additions & 32 deletions backend/scripts/rag/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,16 @@
import firebase_admin

from _shared import *
from database._client import *
from models.facts import Fact, FactDB

firebase_admin.initialize_app()
from database.auth import get_user_name
from utils.llm import new_facts_extractor
import database.facts as facts_db


def get_facts_from_memory(memories: List[dict], uid: str) -> List[Tuple[str, List[Fact]]]:
def get_facts_from_memories(memories: List[dict], uid: str) -> List[Tuple[str, List[Fact]]]:
all_facts = {}
user_name = get_user_name(uid)
print('User:', user_name)
chunks = [memories[i:i + 50] for i in range(0, len(memories), 50)]
chunks = [memories[i:i + 25] for i in range(0, len(memories), 25)]

def execute(chunk):
only_facts: List[Fact] = []
Expand All @@ -37,40 +34,42 @@ def execute(chunk):

[t.start() for t in threads]
[t.join() for t in threads]
data: List[Tuple[str, List[Fact]]] = []

for key, value in all_facts.items():
data.append([key, value])
memory_id, facts = key, value
memory = next((m for m in memories if m['id'] == memory_id), None)
parsed_facts = []
for fact in facts:
parsed_facts.append(FactDB.from_fact(fact, uid, memory['id'], memory['structured']['category']))
facts_db.save_facts(uid, [fact.dict() for fact in parsed_facts])

return data


def execute_for_user(uid: str):
facts_db.delete_facts(uid)

memories = memories_db.get_memories(uid, limit=2000)
data: List[Tuple[str, List[Fact]]] = get_facts_from_memory(memories, uid)
parsed_facts = []
for item in data:
memory_id, facts = item
memory = next((m for m in memories if m['id'] == memory_id), None)
for fact in facts:
parsed_facts.append(FactDB.from_fact(fact, uid, memory['id'], memory['structured']['category']))
facts_db.save_facts(uid, [fact.dict() for fact in parsed_facts])
get_facts_from_memories(memories, uid)


def script_migrate_users():
uids = get_users_uid()
print('Migrating', len(uids), 'users')
# uids = ['DX8n89KAmUaG9O7Qvj8xTi81Zu12']

threads = []
for uid in uids:
t = threading.Thread(target=execute_for_user, args=(uid,))
threads.append(t)

chunk_size = 25
chunks = [threads[i:i + chunk_size] for i in range(0, len(threads), chunk_size)]
for i, chunk in enumerate(chunks):
print('STARTING CHUNK', i + 1)
[t.start() for t in chunk]
[t.join() for t in chunk]
# uids = get_users_uid()
# print('Migrating', len(uids), 'users')
uids = ['yOnlnL4a3CYHe6Zlfotrngz9T3w2']
execute_for_user(uids[0])

# threads = []
# for uid in uids:
# t = threading.Thread(target=execute_for_user, args=(uid,))
# threads.append(t)

# chunk_size = 1
# chunks = [threads[i:i + chunk_size] for i in range(0, len(threads), chunk_size)]
# for i, chunk in enumerate(chunks):
# print('STARTING CHUNK', i + 1)
# [t.start() for t in chunk]
# [t.join() for t in chunk]


if __name__ == '__main__':
script_migrate_users()
7 changes: 3 additions & 4 deletions backend/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,14 +495,13 @@ def obtain_emotional_message(uid: str, memory: Memory, context: str, emotion: st
class Facts(BaseModel):
facts: List[Fact] = Field(
min_items=0,
# max_items=3,
description="List of new user facts, preferences, interests, or topics.",
max_items=3,
description="List of **new** facts.",
)


def new_facts_extractor(uid: str, segments: List[TranscriptSegment]) -> List[Fact]:
user_name, facts_str = get_prompt_facts(uid)

content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
if not content or len(content) < 100: # less than 100 chars, probably nothing
return []
Expand Down Expand Up @@ -552,7 +551,7 @@ def new_facts_extractor(uid: str, segments: List[TranscriptSegment]) -> List[Fac
**Output Instructions**:
- Provide up to 5 valuable new facts.
- Provide none up to 3 valuable new facts.
- If you do not find any new or noteworthy facts, provide an empty list.
- Do not include any explanations or additional text; only list the facts.
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/memories/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ def get_prompt_data(uid: str) -> Tuple[str, List[Fact], List[Fact]]:
# TODO: filter only reviewed True
generated = [Fact(**fact) for fact in existing_facts if not fact['manually_added']]
user_name = get_user_name(uid)
print('get_prompt_data', user_name, len(user_made), len(generated))
# print('get_prompt_data', user_name, len(user_made), len(generated))
return user_name, user_made, generated

0 comments on commit b64becc

Please sign in to comment.