Skip to content

Commit e6adb4b

Browse files
committed
[sites:zoom] Add support for Zoom.us (Closes #159)
1 parent 2c92ed5 commit e6adb4b

File tree

5 files changed

+176
-1
lines changed

5 files changed

+176
-1
lines changed

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ Supported sites:
182182
- YouTube.com - Livestreams, past broadcasts and premieres.
183183
- Twitch.tv - Livestreams, past broadcasts and clips.
184184
- Reddit.com - Livestreams, past broadcasts
185+
- Zoom.us - Past broadcasts
185186
- Facebook.com (currently in development) - Livestreams and past
186187
broadcasts.
187188

chat_downloader/sites/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
from .youtube import YouTubeChatDownloader
44
from .twitch import TwitchChatDownloader
5-
# from .facebook import FacebookChatDownloader
65
from .reddit import RedditChatDownloader
6+
from .zoom import ZoomChatDownloader
77
from .common import BaseChatDownloader
88

99

chat_downloader/sites/zoom.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
2+
3+
import json
4+
import re
5+
from .common import (
6+
BaseChatDownloader,
7+
Chat,
8+
Remapper as r
9+
)
10+
from ..utils.core import (
11+
time_to_seconds,
12+
regex_search,
13+
ensure_seconds
14+
)
15+
from ..errors import (
16+
SiteError,
17+
ParsingError
18+
)
19+
20+
# TODO add debugging options
21+
# from ..debugging import (
22+
# log,
23+
# debug_log
24+
# )
25+
26+
27+
class ZoomError(SiteError):
28+
"""Raised when an error occurs with a Zoom video."""
29+
pass
30+
31+
32+
class ZoomChatDownloader(BaseChatDownloader):
33+
_NAME = 'zoom.us'
34+
35+
_ZOOM_HOMEPAGE = 'https://zoom.us/'
36+
_ZOOM_PATH_TEMPLATE = 'rec/play/{id}'
37+
38+
_INITIAL_INFO_REGEX = r'(?s)window\.__data__\s*=\s*({.+?});'
39+
_CHAT_MESSAGES_REGEX = r'window\.__data__\.chatList\.push\((\{[\s\S]+?\})\)'
40+
41+
_SITE_DEFAULT_PARAMS = {
42+
'format': 'default', # TODO create zoom format
43+
}
44+
45+
_REMAPPING = {
46+
'username': 'author_name',
47+
'time': 'time_text',
48+
'content': 'message',
49+
}
50+
51+
_TESTS = [
52+
{
53+
'name': 'Get chat messages from past broadcast #1',
54+
'params': {
55+
'url': 'https://zoom.us/rec/play/6ccrIuigqG83GIaT4wSDAv59W9W5J_-s1HUe_6UPykq3V3hVN1emMucTYLEJiA87rIkEPcGptB0Dp_dH',
56+
'max_messages': 10
57+
},
58+
'expected_result': {
59+
'messages_condition': lambda messages: len(messages) > 0,
60+
}
61+
},
62+
{
63+
'name': 'Get chat messages from past broadcast #2',
64+
'params': {
65+
'url': 'https://zoom.us/rec/play/65V5deGq-Do3T9bHuASDAv4tW420f_ms1iIb-vIKzEqzUiEFNFWiYONAN-vRvNmKnlg6z95Y4mNQ9QJQ',
66+
'max_messages': 10
67+
},
68+
'expected_result': {
69+
'messages_condition': lambda messages: len(messages) > 0,
70+
}
71+
},
72+
{
73+
'name': 'Get chat messages from past broadcast #3',
74+
'params': {
75+
'url': 'https://zoom.us/rec/play/75Usc7j8rjg3E92S4gSDAf95W9S9K6-sg3dP_voImR60WiEHYVSmYrsbNwNE1_6-jwlwLx5cg1IeyjM',
76+
'max_messages': 10
77+
},
78+
'expected_result': {
79+
'messages_condition': lambda messages: len(messages) > 0,
80+
}
81+
},
82+
{
83+
'name': 'Invalid video',
84+
'params': {
85+
'url': 'https://zoom.us/rec/play/invalid',
86+
},
87+
'expected_result': {
88+
'error': ZoomError
89+
}
90+
},
91+
]
92+
93+
# Regex provided by youtube-dl
94+
_VALID_URLS = {
95+
'_get_chat_by_video_id': r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)',
96+
}
97+
_ERROR_MESSAGE_REGEX = r'<span class="error-message">\s*([^<]+?)\s*<\/span>'
98+
99+
def _get_chat_by_video_id(self, match, params):
100+
match_id = match.group('id')
101+
base_url = match.group('base_url')
102+
return self.get_chat_by_video_id(match_id, params, base_url=base_url)
103+
104+
def get_chat_by_video_id(self, video_id, params, base_url=_ZOOM_HOMEPAGE):
105+
106+
url = base_url + self._ZOOM_PATH_TEMPLATE.format(id=video_id)
107+
page_data = self._session_get(url).text
108+
109+
json_string = regex_search(page_data, self._INITIAL_INFO_REGEX)
110+
111+
if json_string is None:
112+
error_message = regex_search(page_data, self._ERROR_MESSAGE_REGEX)
113+
if error_message:
114+
raise ZoomError(error_message.split('\n')[0])
115+
else:
116+
raise ParsingError('Error parsing video')
117+
118+
initial_info = self._parse_js_dict(json_string)
119+
120+
video_type = 'video' if initial_info.get('isVideo') else 'not_video'
121+
122+
return Chat(
123+
self._get_chat_messages(page_data, params),
124+
125+
title=initial_info.get('topic'),
126+
video_type=video_type,
127+
start_time=initial_info.get('fileStartTime'),
128+
id=initial_info.get('recordingId'),
129+
)
130+
131+
def _parse_js_dict(self, json_string):
132+
# Helper method to parse JS dictionary format
133+
result = re.sub(r"^([^:\s]+):\s+", r'"\g<1>": ',
134+
json_string, 0, re.MULTILINE)
135+
result = result.replace(r"\'", "'")
136+
result = re.sub(r":\s+'(.*)'", ": \"\\g<1>\"", result, 0, re.MULTILINE)
137+
return json.loads(result)
138+
139+
def _get_chat_messages(self, page_data, params):
140+
start_time = ensure_seconds(params.get('start_time'), 0)
141+
end_time = ensure_seconds(params.get('end_time'), float('inf'))
142+
143+
for item in re.findall(self._CHAT_MESSAGES_REGEX, page_data):
144+
data = self._parse_js_dict(item)
145+
data = r.remap_dict(data, self._REMAPPING)
146+
147+
# Process time inforamtion
148+
data['time_in_seconds'] = time_to_seconds(data['time_text'])
149+
if data['time_in_seconds'] < start_time:
150+
continue
151+
152+
if data['time_in_seconds'] > end_time:
153+
return
154+
155+
BaseChatDownloader._move_to_dict(data, 'author')
156+
yield data

docs/README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ Supported sites:
163163
- YouTube.com - Livestreams, past broadcasts and premieres.
164164
- Twitch.tv - Livestreams, past broadcasts and clips.
165165
- Reddit.com - Livestreams, past broadcasts
166+
- Zoom.us - Past broadcasts
166167
- Facebook.com (currently in development) - Livestreams and past
167168
broadcasts.
168169

docs/source/sites.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,23 @@
2323
:undoc-members:
2424
:show-inheritance:
2525

26+
:py:class:`RedditChatDownloader`
27+
********************************
28+
29+
.. autoclass:: chat_downloader.sites.RedditChatDownloader
30+
:members:
31+
:undoc-members:
32+
:show-inheritance:
33+
34+
35+
:py:class:`ZoomChatDownloader`
36+
********************************
37+
38+
.. autoclass:: chat_downloader.sites.ZoomChatDownloader
39+
:members:
40+
:undoc-members:
41+
:show-inheritance:
42+
2643

2744
:py:class:`BaseChatDownloader`
2845
******************************

0 commit comments

Comments
 (0)