Skip to content

Gathering slack messages from conversations #1513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
96305ff
message model and message fetching
Dishant1804 May 23, 2025
ddfeb01
checks and lints
Dishant1804 May 23, 2025
bdd2e59
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 23, 2025
18fce4e
tests and command fix
Dishant1804 May 25, 2025
86b09a2
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 25, 2025
c731b5f
spellin fixed
Dishant1804 May 25, 2025
fed1d2d
Merge branch 'main' into message_model
Dishant1804 May 26, 2025
243c2d7
Merge branch 'main' into message_model
Dishant1804 May 27, 2025
d06a386
Merge branch 'main' into message_model
Dishant1804 May 27, 2025
1c4a5ff
Merge branch 'main' into message_model
Dishant1804 May 28, 2025
4ab44a3
message model with updated fetch
Dishant1804 May 28, 2025
7a0e239
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 28, 2025
ebc9009
changes and code rabbit suggestions
Dishant1804 May 28, 2025
6b2554c
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 28, 2025
9c737e4
Merge branch 'main' into message_model
Dishant1804 May 28, 2025
0d67c1f
Merge branch 'main' into message_model
Dishant1804 May 29, 2025
2182427
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 May 30, 2025
9ba5e9f
Update code
arkid15r Jun 1, 2025
0f9dd70
suggestions implemented
Dishant1804 Jun 3, 2025
02db4ff
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 3, 2025
a530ae5
chunk model along with embeddings
Dishant1804 Jun 6, 2025
93e1a5b
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 6, 2025
46076df
pre commit
Dishant1804 Jun 6, 2025
4cb8d97
cspell checks
Dishant1804 Jun 6, 2025
532b104
code rabbit suggestions
Dishant1804 Jun 7, 2025
fdf7936
Merge branch 'main' into message_model
Dishant1804 Jun 7, 2025
2bed7e0
removed files related to chunking
Dishant1804 Jun 8, 2025
78f6200
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 8, 2025
f74e546
suggestions implemented
Dishant1804 Jun 11, 2025
e3381c1
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 Jun 11, 2025
6d5322f
code rabbit suggestions
Dishant1804 Jun 11, 2025
4cb6068
Run make update
arkid15r Jun 11, 2025
f1b573f
Update code
arkid15r Jun 11, 2025
e60be10
Merge branch 'main' into message_model
arkid15r Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ shell-db:
slack-sync-data:
@echo "Syncing Slack data"
@CMD="python manage.py slack_sync_data" $(MAKE) exec-backend-command
@echo "Syncing Slack messages"
@CMD="python manage.py slack_sync_messages" $(MAKE) exec-backend-command

sync-data: \
update-data \
Expand Down
13 changes: 13 additions & 0 deletions backend/apps/slack/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from apps.slack.models.conversation import Conversation
from apps.slack.models.event import Event
from apps.slack.models.member import Member
from apps.slack.models.message import Message
from apps.slack.models.workspace import Workspace


Expand Down Expand Up @@ -127,6 +128,17 @@ def approve_suggested_users(self, request, queryset):
approve_suggested_users.short_description = "Approve the suggested user (if only one exists)"


class MessageAdmin(admin.ModelAdmin):
search_fields = (
"slack_message_id",
"text",
)
list_display = (
"text",
"is_thread",
)


class WorkspaceAdmin(admin.ModelAdmin):
search_fields = (
"name",
Expand All @@ -137,4 +149,5 @@ class WorkspaceAdmin(admin.ModelAdmin):
admin.site.register(Conversation, ConversationAdmin)
admin.site.register(Event, EventAdmin)
admin.site.register(Member, MemberAdmin)
admin.site.register(Message, MessageAdmin)
admin.site.register(Workspace, WorkspaceAdmin)
215 changes: 215 additions & 0 deletions backend/apps/slack/management/commands/slack_sync_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""A command to populate Slack messages data for all conversations."""

import logging
import time

from django.core.management.base import BaseCommand
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

from apps.slack.models import Conversation, Message, Workspace

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Populate messages for all Slack conversations"

def add_arguments(self, parser):
"""Define command line arguments."""
parser.add_argument(
"--batch-size",
type=int,
default=200,
help="Number of messages to retrieve per request",
)
parser.add_argument(
"--delay",
type=float,
default=0.5,
help="Delay between API requests in seconds",
)
parser.add_argument(
"--channel-id",
type=str,
help="Specific channel ID to fetch messages from",
)

def handle(self, *args, **options):
batch_size = options["batch_size"]
delay = options["delay"]
channel_id = options["channel_id"]
include_threads = True

workspaces = Workspace.objects.all()
if not workspaces.exists():
self.stdout.write(self.style.WARNING("No workspaces found in the database"))
return

for workspace in workspaces:
self.stdout.write(f"\nProcessing workspace: {workspace.name}")

if not (bot_token := workspace.bot_token):
self.stdout.write(self.style.ERROR(f"No bot token found for {workspace}"))
continue

client = WebClient(token=bot_token)

conversations = (
[Conversation.objects.get(slack_channel_id=channel_id)]
if channel_id
else Conversation.objects.filter(workspace=workspace)
)

for conversation in conversations:
self._fetch_messages_for_conversation(
client=client,
conversation=conversation,
batch_size=batch_size,
delay=delay,
include_threads=include_threads,
)

self.stdout.write(self.style.SUCCESS("\nFinished processing all workspaces"))

def _fetch_messages_for_conversation(
self,
client: WebClient,
conversation: Conversation,
batch_size: int,
delay: float,
*,
include_threads: bool,
):
"""Fetch messages for a single conversation from its beginning."""
self.stdout.write(f"\nProcessing channel: {conversation.name}")

try:
last_message = (
Message.objects.filter(conversation=conversation).order_by("-timestamp").first()
)
oldest = (
str(last_message.timestamp.timestamp())
if last_message and last_message.timestamp
else None
)

cursor = None
has_more = True

while has_more:
response = client.conversations_history(
channel=conversation.slack_channel_id,
cursor=cursor,
limit=batch_size,
oldest=oldest,
)
self._handle_slack_response(response, "conversations_history")
messages_data = response.get("messages", [])

messages = [
message
for message_data in messages_data
if (
message := self._create_message_from_data(
client, message_data, conversation, include_threads=include_threads
)
)
]

messages_count = len(messages)
if messages:
Message.bulk_save(messages)
self.stdout.write(f"Saved {messages_count} messages")

cursor = response.get("response_metadata", {}).get("next_cursor")
has_more = bool(cursor)

if delay and has_more:
time.sleep(delay)

self.stdout.write(
self.style.SUCCESS(f"Finished processing messages from {conversation.name}")
)

except SlackApiError as e:
self.stdout.write(
self.style.ERROR(
f"Failed to fetch messages for {conversation.name}: {e.response['error']}"
)
)

def _create_message_from_data(
self,
client: WebClient,
message_data: dict,
conversation: Conversation,
*,
include_threads: bool,
) -> Message | None:
"""Create Message instance from Slack API data."""
try:
if message_data.get("subtype") in ["channel_join", "channel_leave"]:
return None

if (
not message_data.get("text")
and not message_data.get("attachments")
and not message_data.get("files")
):
return None

message_ts = message_data.get("ts")
thread_ts = message_data.get("thread_ts")

is_actual_thread_parent = message_data.get("reply_count", 0) > 0
is_reply_in_thread = thread_ts is not None and thread_ts != message_ts

if include_threads and is_reply_in_thread:
return None

message_dict = {
"slack_message_id": message_ts,
"conversation": conversation,
"text": message_data.get("text", ""),
"timestamp": message_ts,
"is_thread": is_actual_thread_parent,
}

if include_threads and is_actual_thread_parent:
try:
thread_response = client.conversations_replies(
channel=conversation.slack_channel_id,
ts=message_ts,
)
self._handle_slack_response(thread_response, "conversations_replies")

if thread_response.get("ok"):
api_thread_messages = thread_response.get("messages", [])
current_parent_text = message_dict.get("text", "")
combined_text_list = [current_parent_text]

reply_texts = [
reply_msg_data.get("text", "")
for reply_msg_data in api_thread_messages[1:]
if reply_msg_data.get("text", "")
]
combined_text_list.extend(reply_texts)

message_dict["text"] = "\n\n".join(filter(None, combined_text_list))

except SlackApiError:
logger.warning("Failed to fetch thread replies")

return Message.update_data(message_dict, save=False)

except KeyError:
logger.warning("Invalid message data")
return None

def _handle_slack_response(self, response, api_method):
"""Handle Slack API response and raise exception if needed."""
if not response["ok"]:
error_message = f"{api_method} API call failed"
logger.error(error_message)
self.stdout.write(self.style.ERROR(error_message))
46 changes: 46 additions & 0 deletions backend/apps/slack/migrations/0014_message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 5.2.1 on 2025-05-28 05:57

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("slack", "0013_alter_conversation_total_members_count_and_more"),
]

operations = [
migrations.CreateModel(
name="Message",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("nest_created_at", models.DateTimeField(auto_now_add=True)),
("nest_updated_at", models.DateTimeField(auto_now=True)),
("is_thread", models.BooleanField(default=False, verbose_name="Is Thread")),
(
"slack_message_id",
models.CharField(max_length=50, verbose_name="Slack Message ID"),
),
("text", models.TextField(blank=True, verbose_name="Message Text")),
("timestamp", models.DateTimeField(blank=True, verbose_name="Message Timestamp")),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="messages",
to="slack.conversation",
),
),
],
options={
"verbose_name_plural": "Messages",
"db_table": "slack_messages",
"unique_together": {("conversation", "slack_message_id")},
},
),
]
1 change: 1 addition & 0 deletions backend/apps/slack/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .conversation import Conversation
from .event import Event
from .member import Member
from .message import Message
from .workspace import Workspace
2 changes: 1 addition & 1 deletion backend/apps/slack/models/conversation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Slack app channel model."""
"""Slack app conversation model."""

from datetime import UTC, datetime

Expand Down
Loading