-
-
Notifications
You must be signed in to change notification settings - Fork 178
Gathering slack messages from conversations #1513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 17 commits
Commits
Show all changes
34 commits
Select commit
Hold shift + click to select a range
96305ff
message model and message fetching
Dishant1804 ddfeb01
checks and lints
Dishant1804 bdd2e59
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 18fce4e
tests and command fix
Dishant1804 86b09a2
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 c731b5f
spellin fixed
Dishant1804 fed1d2d
Merge branch 'main' into message_model
Dishant1804 243c2d7
Merge branch 'main' into message_model
Dishant1804 d06a386
Merge branch 'main' into message_model
Dishant1804 1c4a5ff
Merge branch 'main' into message_model
Dishant1804 4ab44a3
message model with updated fetch
Dishant1804 7a0e239
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 ebc9009
changes and code rabbit suggestions
Dishant1804 6b2554c
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 9c737e4
Merge branch 'main' into message_model
Dishant1804 0d67c1f
Merge branch 'main' into message_model
Dishant1804 2182427
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 9ba5e9f
Update code
arkid15r 0f9dd70
suggestions implemented
Dishant1804 02db4ff
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 a530ae5
chunk model along with embeddings
Dishant1804 93e1a5b
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 46076df
pre commit
Dishant1804 4cb8d97
cspell checks
Dishant1804 532b104
code rabbit suggestions
Dishant1804 fdf7936
Merge branch 'main' into message_model
Dishant1804 2bed7e0
removed files related to chunking
Dishant1804 78f6200
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 f74e546
suggestions implemented
Dishant1804 e3381c1
Merge remote-tracking branch 'upstream/main' into message_model
Dishant1804 6d5322f
code rabbit suggestions
Dishant1804 4cb6068
Run make update
arkid15r f1b573f
Update code
arkid15r e60be10
Merge branch 'main' into message_model
arkid15r File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
215 changes: 215 additions & 0 deletions
215
backend/apps/slack/management/commands/slack_sync_messages.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
"""A command to populate Slack messages data for all conversations.""" | ||
|
||
import logging | ||
import time | ||
|
||
from django.core.management.base import BaseCommand | ||
from slack_sdk import WebClient | ||
from slack_sdk.errors import SlackApiError | ||
|
||
from apps.slack.models import Conversation, Message, Workspace | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Populate messages for all Slack conversations" | ||
|
||
def add_arguments(self, parser): | ||
"""Define command line arguments.""" | ||
parser.add_argument( | ||
"--batch-size", | ||
type=int, | ||
default=200, | ||
help="Number of messages to retrieve per request", | ||
) | ||
parser.add_argument( | ||
"--delay", | ||
type=float, | ||
default=0.5, | ||
help="Delay between API requests in seconds", | ||
) | ||
parser.add_argument( | ||
"--channel-id", | ||
type=str, | ||
help="Specific channel ID to fetch messages from", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
batch_size = options["batch_size"] | ||
delay = options["delay"] | ||
channel_id = options["channel_id"] | ||
include_threads = True | ||
|
||
workspaces = Workspace.objects.all() | ||
if not workspaces.exists(): | ||
self.stdout.write(self.style.WARNING("No workspaces found in the database")) | ||
return | ||
|
||
for workspace in workspaces: | ||
self.stdout.write(f"\nProcessing workspace: {workspace.name}") | ||
|
||
if not (bot_token := workspace.bot_token): | ||
self.stdout.write(self.style.ERROR(f"No bot token found for {workspace}")) | ||
continue | ||
|
||
client = WebClient(token=bot_token) | ||
|
||
conversations = ( | ||
[Conversation.objects.get(slack_channel_id=channel_id)] | ||
if channel_id | ||
else Conversation.objects.filter(workspace=workspace) | ||
) | ||
|
||
for conversation in conversations: | ||
self._fetch_messages_for_conversation( | ||
client=client, | ||
conversation=conversation, | ||
batch_size=batch_size, | ||
delay=delay, | ||
include_threads=include_threads, | ||
) | ||
|
||
self.stdout.write(self.style.SUCCESS("\nFinished processing all workspaces")) | ||
|
||
def _fetch_messages_for_conversation( | ||
self, | ||
client: WebClient, | ||
conversation: Conversation, | ||
batch_size: int, | ||
delay: float, | ||
*, | ||
include_threads: bool, | ||
): | ||
"""Fetch messages for a single conversation from its beginning.""" | ||
self.stdout.write(f"\nProcessing channel: {conversation.name}") | ||
|
||
try: | ||
last_message = ( | ||
Message.objects.filter(conversation=conversation).order_by("-timestamp").first() | ||
) | ||
oldest = ( | ||
str(last_message.timestamp.timestamp()) | ||
if last_message and last_message.timestamp | ||
else None | ||
) | ||
|
||
cursor = None | ||
has_more = True | ||
|
||
while has_more: | ||
response = client.conversations_history( | ||
channel=conversation.slack_channel_id, | ||
cursor=cursor, | ||
limit=batch_size, | ||
oldest=oldest, | ||
) | ||
self._handle_slack_response(response, "conversations_history") | ||
messages_data = response.get("messages", []) | ||
|
||
messages = [ | ||
message | ||
for message_data in messages_data | ||
if ( | ||
message := self._create_message_from_data( | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
client, message_data, conversation, include_threads=include_threads | ||
) | ||
) | ||
] | ||
|
||
messages_count = len(messages) | ||
if messages: | ||
Message.bulk_save(messages) | ||
self.stdout.write(f"Saved {messages_count} messages") | ||
|
||
cursor = response.get("response_metadata", {}).get("next_cursor") | ||
has_more = bool(cursor) | ||
|
||
if delay and has_more: | ||
time.sleep(delay) | ||
|
||
self.stdout.write( | ||
self.style.SUCCESS(f"Finished processing messages from {conversation.name}") | ||
) | ||
|
||
except SlackApiError as e: | ||
self.stdout.write( | ||
self.style.ERROR( | ||
f"Failed to fetch messages for {conversation.name}: {e.response['error']}" | ||
) | ||
) | ||
|
||
def _create_message_from_data( | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self, | ||
client: WebClient, | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
message_data: dict, | ||
conversation: Conversation, | ||
*, | ||
include_threads: bool, | ||
) -> Message | None: | ||
"""Create Message instance from Slack API data.""" | ||
try: | ||
if message_data.get("subtype") in ["channel_join", "channel_leave"]: | ||
return None | ||
|
||
if ( | ||
not message_data.get("text") | ||
and not message_data.get("attachments") | ||
and not message_data.get("files") | ||
): | ||
return None | ||
|
||
message_ts = message_data.get("ts") | ||
thread_ts = message_data.get("thread_ts") | ||
|
||
is_actual_thread_parent = message_data.get("reply_count", 0) > 0 | ||
is_reply_in_thread = thread_ts is not None and thread_ts != message_ts | ||
|
||
if include_threads and is_reply_in_thread: | ||
return None | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
message_dict = { | ||
"slack_message_id": message_ts, | ||
"conversation": conversation, | ||
"text": message_data.get("text", ""), | ||
"timestamp": message_ts, | ||
"is_thread": is_actual_thread_parent, | ||
} | ||
|
||
if include_threads and is_actual_thread_parent: | ||
try: | ||
thread_response = client.conversations_replies( | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
channel=conversation.slack_channel_id, | ||
ts=message_ts, | ||
) | ||
self._handle_slack_response(thread_response, "conversations_replies") | ||
|
||
if thread_response.get("ok"): | ||
api_thread_messages = thread_response.get("messages", []) | ||
current_parent_text = message_dict.get("text", "") | ||
combined_text_list = [current_parent_text] | ||
|
||
reply_texts = [ | ||
reply_msg_data.get("text", "") | ||
for reply_msg_data in api_thread_messages[1:] | ||
if reply_msg_data.get("text", "") | ||
] | ||
combined_text_list.extend(reply_texts) | ||
|
||
message_dict["text"] = "\n\n".join(filter(None, combined_text_list)) | ||
|
||
except SlackApiError: | ||
logger.warning("Failed to fetch thread replies") | ||
|
||
return Message.update_data(message_dict, save=False) | ||
|
||
except KeyError: | ||
Dishant1804 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.warning("Invalid message data") | ||
return None | ||
|
||
def _handle_slack_response(self, response, api_method): | ||
"""Handle Slack API response and raise exception if needed.""" | ||
if not response["ok"]: | ||
error_message = f"{api_method} API call failed" | ||
logger.error(error_message) | ||
self.stdout.write(self.style.ERROR(error_message)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Generated by Django 5.2.1 on 2025-05-28 05:57 | ||
|
||
import django.db.models.deletion | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("slack", "0013_alter_conversation_total_members_count_and_more"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="Message", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, primary_key=True, serialize=False, verbose_name="ID" | ||
), | ||
), | ||
("nest_created_at", models.DateTimeField(auto_now_add=True)), | ||
("nest_updated_at", models.DateTimeField(auto_now=True)), | ||
("is_thread", models.BooleanField(default=False, verbose_name="Is Thread")), | ||
( | ||
"slack_message_id", | ||
models.CharField(max_length=50, verbose_name="Slack Message ID"), | ||
), | ||
("text", models.TextField(blank=True, verbose_name="Message Text")), | ||
("timestamp", models.DateTimeField(blank=True, verbose_name="Message Timestamp")), | ||
( | ||
"conversation", | ||
models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="messages", | ||
to="slack.conversation", | ||
), | ||
), | ||
], | ||
options={ | ||
"verbose_name_plural": "Messages", | ||
"db_table": "slack_messages", | ||
"unique_together": {("conversation", "slack_message_id")}, | ||
}, | ||
), | ||
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from .conversation import Conversation | ||
from .event import Event | ||
from .member import Member | ||
from .message import Message | ||
from .workspace import Workspace |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
"""Slack app channel model.""" | ||
"""Slack app conversation model.""" | ||
|
||
from datetime import UTC, datetime | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.