Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
96305ff
message model and message fetching
Dishant1804 May 23, 2025
ddfeb01
checks and lints
Dishant1804 May 23, 2025
bdd2e59
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 23, 2025
18fce4e
tests and command fix
Dishant1804 May 25, 2025
86b09a2
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 25, 2025
c731b5f
spellin fixed
Dishant1804 May 25, 2025
fed1d2d
Merge branch 'main' into message_model
Dishant1804 May 26, 2025
243c2d7
Merge branch 'main' into message_model
Dishant1804 May 27, 2025
d06a386
Merge branch 'main' into message_model
Dishant1804 May 27, 2025
1c4a5ff
Merge branch 'main' into message_model
Dishant1804 May 28, 2025
4ab44a3
message model with updated fetch
Dishant1804 May 28, 2025
7a0e239
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 28, 2025
ebc9009
changes and code rabbit suggestions
Dishant1804 May 28, 2025
6b2554c
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 28, 2025
9c737e4
Merge branch 'main' into message_model
Dishant1804 May 28, 2025
0d67c1f
Merge branch 'main' into message_model
Dishant1804 May 29, 2025
2182427
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 30, 2025
9ba5e9f
Update code
arkid15r Jun 1, 2025
0f9dd70
suggestions implemented
Dishant1804 Jun 3, 2025
02db4ff
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 3, 2025
a530ae5
chunk model along with embeddings
Dishant1804 Jun 6, 2025
93e1a5b
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 6, 2025
46076df
pre commit
Dishant1804 Jun 6, 2025
4cb8d97
cspell checks
Dishant1804 Jun 6, 2025
532b104
code rabbit suggestions
Dishant1804 Jun 7, 2025
fdf7936
Merge branch 'main' into message_model
Dishant1804 Jun 7, 2025
2bed7e0
removed files related to chunking
Dishant1804 Jun 8, 2025
78f6200
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 8, 2025
f74e546
suggestions implemented
Dishant1804 Jun 11, 2025
e3381c1
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 11, 2025
6d5322f
code rabbit suggestions
Dishant1804 Jun 11, 2025
4cb6068
Run make update
arkid15r Jun 11, 2025
f1b573f
Update code
arkid15r Jun 11, 2025
e60be10
Merge branch 'main' into message_model
arkid15r Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ slack-sync-data:
@echo "Syncing Slack data"
@CMD="python manage.py slack_sync_data" $(MAKE) exec-backend-command

slack-sync-messages:
@echo "Syncing Slack messages"
@CMD="python manage.py slack_sync_messages" $(MAKE) exec-backend-command

sync-data: \
update-data \
enrich-data \
Expand Down
15 changes: 15 additions & 0 deletions backend/apps/slack/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from apps.slack.models.conversation import Conversation
from apps.slack.models.event import Event
from apps.slack.models.member import Member
from apps.slack.models.message import Message
from apps.slack.models.workspace import Workspace


Expand Down Expand Up @@ -127,6 +128,19 @@ def approve_suggested_users(self, request, queryset):
approve_suggested_users.short_description = "Approve the suggested user (if only one exists)"


class MessageAdmin(admin.ModelAdmin):
autocomplete_fields = ("author", "conversation", "parent_message")
list_display = (
"text",
"has_replies",
"author",
)
search_fields = (
"slack_message_id",
"text",
)


class WorkspaceAdmin(admin.ModelAdmin):
search_fields = (
"name",
Expand All @@ -137,4 +151,5 @@ class WorkspaceAdmin(admin.ModelAdmin):
admin.site.register(Conversation, ConversationAdmin)
admin.site.register(Event, EventAdmin)
admin.site.register(Member, MemberAdmin)
admin.site.register(Message, MessageAdmin)
admin.site.register(Workspace, WorkspaceAdmin)
308 changes: 308 additions & 0 deletions backend/apps/slack/management/commands/slack_sync_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
"""A command to populate Slack messages data for all conversations."""

import logging
import time

from django.core.management.base import BaseCommand
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

from apps.slack.models import Conversation, Member, Message, Workspace

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Populate messages for all Slack conversations"

def add_arguments(self, parser):
"""Define command line arguments."""
parser.add_argument(
"--batch-size",
type=int,
default=200,
help="Number of messages to retrieve per request",
)
parser.add_argument(
"--delay",
type=float,
default=0.5,
help="Delay between API requests in seconds",
)
parser.add_argument(
"--channel-id",
type=str,
help="Specific channel ID to fetch messages from",
)

def handle(self, *args, **options):
batch_size = options["batch_size"]
channel_id = options["channel_id"]
delay = options["delay"]

workspaces = Workspace.objects.all()
if not workspaces.exists():
self.stdout.write(self.style.WARNING("No workspaces found in the database"))
return

for workspace in workspaces:
self.stdout.write(f"\nProcessing workspace: {workspace.name}")

if not (bot_token := workspace.bot_token):
self.stdout.write(self.style.ERROR(f"No bot token found for {workspace}"))
continue

client = WebClient(token=bot_token)

conversations = (
Conversation.objects.filter(slack_channel_id=channel_id)
if channel_id
else Conversation.objects.filter(workspace=workspace)
)

for conversation in conversations:
self._fetch_conversation(
batch_size=batch_size,
client=client,
conversation=conversation,
delay=delay,
include_replies=True,
)

self.stdout.write(self.style.SUCCESS("\nFinished processing all workspaces"))

def _fetch_conversation(
self,
client: WebClient,
conversation: Conversation,
batch_size: int,
delay: float,
*,
include_replies: bool = True,
):
"""Fetch messages for a single conversation from its beginning."""
self.stdout.write(f"\nProcessing channel: {conversation.name}")

try:
messages = self._fetch_messages(
client=client, conversation=conversation, batch_size=batch_size, delay=delay
)

if include_replies:
for message in messages:
self._fetch_replies(
client=client,
conversation=conversation,
message=message,
delay=delay,
)

self.stdout.write(
self.style.SUCCESS(f"Finished processing messages from {conversation.name}")
)

except SlackApiError as e:
self.stdout.write(
self.style.ERROR(
f"Failed to fetch messages for {conversation.name}: {e.response['error']}"
)
)

def _fetch_messages(
self, client: WebClient, conversation: Conversation, batch_size: int, delay: float
) -> list[Message]:
"""Fetch all parent messages (non-thread) for a conversation."""
cursor = None
has_more = True
batch_messages = []
all_threaded_parents = []

latest_message = (
Message.objects.filter(conversation=conversation).order_by("-created_at").first()
)

while has_more:
try:
response = client.conversations_history(
channel=conversation.slack_channel_id,
cursor=cursor,
limit=batch_size,
oldest=latest_message.created_at.timestamp() if latest_message else None,
)
self._handle_slack_response(response, "conversations_history")

for message_data in response.get("messages", []):
if message_data.get("thread_ts") and message_data.get(
"ts"
) != message_data.get("thread_ts"):
continue

message = self._create_message_from_data(
client=client,
conversation=conversation,
message_data=message_data,
)

if message:
batch_messages.append(message)
if message.has_replies:
all_threaded_parents.append(message)

if batch_messages:
Message.bulk_save(batch_messages)
batch_messages = []

cursor = response.get("response_metadata", {}).get("next_cursor")
has_more = bool(cursor)

if delay and has_more:
time.sleep(delay)

except SlackApiError as e:
self.stdout.write(
self.style.ERROR(f"Error fetching messages: {e.response['error']}")
)
break

return all_threaded_parents

def _fetch_replies(
self,
client: WebClient,
conversation: Conversation,
message: Message,
delay: float,
):
"""Fetch all thread replies for parent messages."""
if not message:
return

replies_to_save = []

try:
latest_reply = (
Message.objects.filter(
conversation=conversation,
parent_message=message,
)
.order_by("-created_at")
.first()
)
oldest_ts = latest_reply.created_at.timestamp() if latest_reply else None

cursor = None
has_more = True
thread_reply_count = 0

while has_more:
params = {
"channel": conversation.slack_channel_id,
"ts": message.slack_message_id,
"cursor": cursor,
"limit": 100,
"inclusive": True,
}
if oldest_ts:
params["oldest"] = str(oldest_ts)

response = client.conversations_replies(**params)
self._handle_slack_response(response, "conversations_replies")

messages_in_response = response.get("messages", [])
if not messages_in_response:
break

for reply_data in messages_in_response[1:]:
reply = self._create_message_from_data(
client=client,
message_data=reply_data,
conversation=conversation,
parent_message=message,
)
if reply:
replies_to_save.append(reply)
thread_reply_count += 1

cursor = response.get("response_metadata", {}).get("next_cursor")
has_more = bool(cursor)

if delay and has_more:
time.sleep(delay)

except SlackApiError as e:
self.stdout.write(
self.style.ERROR(
f"Failed to fetch thread replies for message {e.response['error']}"
)
)

if replies_to_save:
batch_size = 1000
for i in range(0, len(replies_to_save), batch_size):
batch = replies_to_save[i : i + batch_size]
Message.bulk_save(batch)

def _create_message_from_data(
self,
client: WebClient,
message_data: dict,
conversation: Conversation,
*,
parent_message: Message | None = None,
) -> Message | None:
"""Create Message instance using from_slack pattern."""
if message_data.get("subtype") in {"channel_join", "channel_leave", "bot_message"}:
return None

if not any(
[
message_data.get("text"),
message_data.get("attachments"),
message_data.get("files"),
message_data.get("blocks"),
]
):
return None

try:
if not (slack_user_id := (message_data.get("user") or message_data.get("bot_id"))):
return None

try:
author = Member.objects.get(
slack_user_id=slack_user_id, workspace=conversation.workspace
)
except Member.DoesNotExist:
try:
user_info = client.users_info(user=slack_user_id)
self._handle_slack_response(user_info, "users_info")

author = Member.update_data(
user_info["user"], conversation.workspace, save=True
)
self.stdout.write(self.style.SUCCESS(f"Created new member: {slack_user_id}"))
except SlackApiError as e:
self.stdout.write(
self.style.WARNING(
f"Failed to fetch user data for {slack_user_id}: {e.response['error']}"
)
)
return None

return Message.update_data(
data=message_data,
conversation=conversation,
author=author,
parent_message=parent_message,
save=False,
)
except Exception:
logger.exception("Error creating message from data")
return None

def _handle_slack_response(self, response, api_method):
"""Handle Slack API response and raise exception if needed."""
if not response["ok"]:
error_message = f"{api_method} API call failed"
logger.error(error_message)
self.stdout.write(self.style.ERROR(error_message))
Loading