From cf4b0cc872c078c6dac4bdc55b65484dd3007224 Mon Sep 17 00:00:00 2001 From: JC-Chung <52159296+JC-Chung@users.noreply.github.com> Date: Fri, 15 Sep 2023 03:11:28 +0800 Subject: [PATCH 1/3] Update twitch.py If display name with Chinese, Japanese, or Korean characters, the name will be incorrect. --- chat_downloader/sites/twitch.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/chat_downloader/sites/twitch.py b/chat_downloader/sites/twitch.py index 9cb2194..32186ca 100644 --- a/chat_downloader/sites/twitch.py +++ b/chat_downloader/sites/twitch.py @@ -1309,11 +1309,13 @@ def get_chat_by_clip_id(self, clip_id, params): ) _MESSAGE_REGEX = re.compile( - r'^@(.+?(?=\s+:)).*tmi\.twitch\.tv\s+(\S+)(?:[^#\r\n]+#)?\s(?:\S+)?(?:\s:([^\r\n]*))?', re.MULTILINE) + # r'^@(.+?(?=\s+:)).*tmi\.twitch\.tv\s+(\S+)(?:[^#\r\n]+#)?\s(?:\S+)?(?:\s:([^\r\n]*))?', re.MULTILINE) + r'^@(.+?(?=\s+:))\s+:(.*)?tmi\.twitch\.tv\s+(\S+)(?:[^#\r\n]+#)?\s(?:\S+)?(?:\s:([^\r\n]*))?', re.MULTILINE) # Groups: # 1. Tag info - # 2. Action type - # 3. Message + # 2. ?User name + # 3. Action type + # 4. Message _BADGE_KEYS = ('title', 'image1x', 'image2x', 'image4x', 'clickAction', 'clickURL') @@ -1416,7 +1418,7 @@ def _parse_irc_item(match): r.remap(info, TwitchChatDownloader._IRC_REMAPPING, keys[0], keys[1], keep_unknown_keys=True, replace_char_with_underscores='-') - message_match = match.group(3) + message_match = match.group(4) if message_match: info['message'] = remove_prefixes(message_match, '\u0001ACTION ') @@ -1443,16 +1445,20 @@ def _parse_irc_item(match): subscriber_badge['months'] = int_or_none( subscriber_badge_metadata['version'], 0) - author_display_name = info.get('author_display_name') - if author_display_name: - info['author_name'] = author_display_name.lower() + user_name_match = match.group(2) + if user_name_match: + info['author_name'] = user_name_match.split('!')[0] + + # author_display_name = info.get('author_display_name') + # if author_display_name: + # info['author_name'] = author_display_name.lower() in_reply_to = BaseChatDownloader._move_to_dict(info, 'in_reply_to') BaseChatDownloader._move_to_dict(in_reply_to, 'author') BaseChatDownloader._move_to_dict(info, 'author') - original_action_type = match.group(2) + original_action_type = match.group(3) if original_action_type: new_action_type = TwitchChatDownloader._ACTION_TYPE_REMAPPING.get( From d1c06751ee7323440c94eedf905738c04c8e9364 Mon Sep 17 00:00:00 2001 From: JC-Chung <52159296+JC-Chung@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:52:05 +0800 Subject: [PATCH 2/3] Clean comment code --- chat_downloader/sites/twitch.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/chat_downloader/sites/twitch.py b/chat_downloader/sites/twitch.py index 32186ca..a08df89 100644 --- a/chat_downloader/sites/twitch.py +++ b/chat_downloader/sites/twitch.py @@ -1309,7 +1309,6 @@ def get_chat_by_clip_id(self, clip_id, params): ) _MESSAGE_REGEX = re.compile( - # r'^@(.+?(?=\s+:)).*tmi\.twitch\.tv\s+(\S+)(?:[^#\r\n]+#)?\s(?:\S+)?(?:\s:([^\r\n]*))?', re.MULTILINE) r'^@(.+?(?=\s+:))\s+:(.*)?tmi\.twitch\.tv\s+(\S+)(?:[^#\r\n]+#)?\s(?:\S+)?(?:\s:([^\r\n]*))?', re.MULTILINE) # Groups: # 1. Tag info @@ -1449,10 +1448,6 @@ def _parse_irc_item(match): if user_name_match: info['author_name'] = user_name_match.split('!')[0] - # author_display_name = info.get('author_display_name') - # if author_display_name: - # info['author_name'] = author_display_name.lower() - in_reply_to = BaseChatDownloader._move_to_dict(info, 'in_reply_to') BaseChatDownloader._move_to_dict(in_reply_to, 'author') From 732360626c40fcc2af8ff66dff9df28469445ffd Mon Sep 17 00:00:00 2001 From: JC-Chung <52159296+JC-Chung@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:58:41 +0800 Subject: [PATCH 3/3] Retention of the original mode --- chat_downloader/sites/twitch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chat_downloader/sites/twitch.py b/chat_downloader/sites/twitch.py index a08df89..334d246 100644 --- a/chat_downloader/sites/twitch.py +++ b/chat_downloader/sites/twitch.py @@ -1447,6 +1447,8 @@ def _parse_irc_item(match): user_name_match = match.group(2) if user_name_match: info['author_name'] = user_name_match.split('!')[0] + elif info.get('author_display_name'): + info['author_name'] = info['author_display_name'].lower() in_reply_to = BaseChatDownloader._move_to_dict(info, 'in_reply_to')