-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpostprocessing.py
345 lines (319 loc) · 14.1 KB
/
postprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import json
import re
import click
from pathlib import Path
import logging
from itertools import dropwhile
from markdown_it.renderer import RendererHTML
from myst_parser.config.main import MdParserConfig
from myst_parser.parsers.mdit import create_md_parser
import configparser
logging.basicConfig()
logger=logging.getLogger(__name__)
logger.setLevel(logging.INFO)
config = configparser.ConfigParser()
config.read('course_utils/config.ini')
print(list(config.keys()))
# Pattern that contains Sphinx directives in the notebook JSON
DIRECTIVE_PATTERN = re.compile('````{(\w+)}\s?(.*)\n')
DIRECTIVE_BACKTICKS = 4
# Mapping from Sphinx directives to optional literal text for Markdown headings
DIRECTIVE_MAPPING = {'image': None,
'admonition': None,
'hint': 'Hint',
'toggle': None}
HEADING_SUB_LEVEL = 4
# Cells with tags in this list will be removed
TAGS_TO_REMOVE = {'instructor', 'remove-cell'}
# Pattern for Sphinx glossary directive
TERM_PATTERN = re.compile(r'{term}`([A-Za-z ]+)`')
FOOTNOTE_PATTERN = re.compile(r'\[\^\d+\]')
FOOTNOTE_ANCHOR_PAT = re.compile(r'\[\^\d+\]:')
GLOSSARY_URL = config['Constants']['GLOSSARY_URL']
IMAGE_URL = config['Constants']['IMAGE_URL']
# HTML for hint directives (dropdowns)
HINT_HTML = '''<details>
<summary>{title}</summary>
{content}
</details>
'''
# HTML for hidden code
CODE_HTML = '''<details>
<summary>Click for a Solution</summary>
<pre><code>
{code}
</code></pre>
</details>
'''
# HTML to style hidden cells in NB classic
MD_STYLE = '''%%html
<style>
details {
border: 1px solid; border-radius: 4px; padding: 0.5em 0.5em 0; }
summary {
font-weight: bold; margin: -0.5em -0.5em 0; padding: 0.5em; background-color: rgba(233,140,61,.2);}
details[open] {
padding: 0.5em; }
details[open] summary {
border-bottom: 1px solid; margin-bottom: 0.5em; background-color: rgba(66,129,81, .2); }
details li {
margin: 10px 0;
}
</style>
'''
class Notebook:
def __init__(self, nb_file):
'''
Class to hold an instance of Jupyter Notebook (JSON) for postprocessing.
:param nb_file: str or Path to an ipynb file.
'''
self.nb_json = self.load_nb(nb_file)
self.data = self.nb_json['cells']
# Markdown parser for inner content when creating HTML directly
self.md = create_md_parser(MdParserConfig(), RendererHTML)
def __iter__(self):
# implements iteration protocol
self.index = -1
return self
def __next__(self):
# iterates through notebook cells
if self.index < len(self.data) - 1:
self.index += 1
return self
else:
raise StopIteration
def hide_tags(self):
''''
Hides the Tags cell toolbar by default, to prevent users from accidentally deleting or modifying tags.
'''
if 'celltoolbar' in self.nb_json['metadata']:
del self.nb_json['metadata']['celltoolbar']
return self
def clear_outputs(self):
'''
Clears output on cells -- assuming the student notebooks should be clean of all outputs.
'''
if self.data[self.index]['cell_type'] == 'code':
self.data[self.index]['outputs'] = []
return self
def make_glossary_links(self):
'''
Replaces instances of the {term} directive in notebook Markdown with links to the term in the glossary.
'''
def term_expand(match_obj):
'''
Function applied in re.sub to replace every instance of the {term} directive in a string with its expanded form as a Markdown link
'''
term = match_obj.group(1)
return f'[{term}]({GLOSSARY_URL}{term.replace(" ", "-")})'
if self.data[self.index]['cell_type'] == 'markdown':
for i, line in enumerate(self.data[self.index]['source']):
# Iterate over matches in line
new_line = re.sub(TERM_PATTERN, term_expand, line)
self.data[self.index]['source'][i] = new_line
return self
def ensure_hidden(self):
'''
Ensures that cells using the Exercise2 Jupyter Notebook extension are hidden by default.
'''
if 'solution2' in self.data[self.index]['metadata']:
self.data[self.index]['metadata']['solution2'] = 'hidden'
return self
def apply_hidden(self):
'''
Toggles the visibility of cells with the hide-cell tag.
'''
if 'hide-cell' in self.data[self.index]['metadata'].get('tags', []):
self.data[self.index]['metadata']['jupyter'] = {'source_hidden': True}
if self.data[self.index]['cell_type'] == 'code':
# Add comment that will be visible on toggled cell
self.data[self.index]['source'].insert(0, '#Click to see the solution.\n')
return self
def hide_for_classic(self, cell):
'''
Replaces a hidden/collapsed code cell with an HTML/Markdown cell to hide the content on the "Classic" interface
'''
cell_source = cell['source']
# Remove leading comment
if cell_source[0].startswith('#'):
cell_source.pop(0)
cell_source = CODE_HTML.format(code=''.join(cell_source))
# Change cell type
cell['cell_type'] = 'markdown'
# Remove code-specific metadata
del cell['execution_count']
del cell['outputs']
cell['source'] = cell_source
# Unhide the cell in Notebook 7
if 'jupyter' in cell['metadata']:
cell['metadata']['jupyter']['source_hidden'] = False
return cell
def strip_footnotes(self):
'''
Removes Markdown footnotes from a given cell
'''
if self.data[self.index]['cell_type'] == 'markdown':
for i, line in enumerate(self.data[self.index]['source']):
# Remove any lines starting with a footnote anchor
if FOOTNOTE_ANCHOR_PAT.match(line):
self.data[self.index]['source'][i] = ''
else:
# Delete footnote references from any other lines
new_line = re.sub(FOOTNOTE_PATTERN, '', line)
self.data[self.index]['source'][i] = new_line
return self
def inline_images(self):
'''
Converts MyST images directives to Markdown inline images, when they occur within larger blocks of Markdown.
'''
if self.data[self.index]['cell_type'] == 'markdown':
is_directive = False
image_url = ''
alt_text = ''
lines = []
for i, line in enumerate(self.data[self.index]['source']):
if not is_directive and (line.startswith('```{image}') or line.startswith('````{image}')):
is_directive = True
_, image_url = line.strip().split() # Assume URL separate from directive by a space
image_url = re.sub(r'^\./', f'{IMAGE_URL}/', image_url) # Replace initial relative path with absolute path
elif is_directive:
if line.startswith(':alt:'):
alt_text = line.replace(':alt: ', '') # Extract alt text
# End of directive
elif line == '```\n' or line == '````\n':
lines.append(f'\n')
is_directive = False
else:
lines.append(line)
self.data[self.index]['source'] = lines
return self
def remove_admonitions(self):
'''
Removes any cells with admonitions
'''
if self.data[self.index]['cell_type'] == 'markdown' and self.data[self.index]['source'] and (self.data[self.index]['source'][0].startswith('````{admonition}') or self.data[self.index]['source'][0].startswith('````{note}')):
self.data.pop(self.index)
# Backtrack for iteration
self.index = self.index - 1
return self
def myst_to_md(self):
'''
Replaces MyST directives in notebook's markdown cells with regular Markdown/rendered HTML, to facilitate use in environments lacking the jupyterlab_myst plugin.
'''
for i, cell in enumerate(self.nb_json['cells']):
# Check for hidden code cells and replace with HTML
if cell['cell_type'] == 'code' and ('hide-cell' in cell['metadata'].get('tags', [])):
self.nb_json['cells'][i] = self.hide_for_classic(cell)
continue
# Assumes the directive encloses the entire cell, excluding any blank initial lines
cell_content = list(dropwhile(lambda x: not x or x.isspace(), cell['source']))
m = DIRECTIVE_PATTERN.match(cell_content[0])
if not m:
continue
match m.groups():
# Custom admonition -- label provided in the text following the directive
case ('admonition', admon_type):
cell_content[0] = '#' * HEADING_SUB_LEVEL + f' {admon_type}\n'
# Image directive -- URL follows the directive
case ('image', image_url):
# Extract alt text if present
alt_tag = [c for c in cell_content if c.startswith(':alt:')]
alt_text = alt_tag[0].replace(':alt: ', '') if alt_tag else ''
cell_content = [f'']
# Other directive -- no label provided, but heading needed
case (directive, _) if DIRECTIVE_MAPPING.get(directive):
# Check for dropdowns
if cell_content[1] == ':class: dropdown\n':
cell_title = f'Click for a {DIRECTIVE_MAPPING.get(directive)}'
# Render the rest of the cell as HTML, removing blank lines first
inner_content = [c for c in cell_content if not c.startswith('`' * DIRECTIVE_BACKTICKS) and not c.startswith(':class:') and not c.isspace()]
# Remove blank lines from the resulting HTML
inner_content = self.md.render(''.join(inner_content))#.replace('\n\n', '')
cell_content = [HINT_HTML.format(title=cell_title, content=inner_content) ]
# No heading needed
case _:
cell_content.pop(0)
# Remove closing backticks and any class statements
cell_content = [c for c in cell_content if not c.startswith('`' * DIRECTIVE_BACKTICKS) and not c.startswith(':class:')]
self.nb_json['cells'][i]['source'] = cell_content
return self
def add_md_style(self):
'''
Adds an %%html block to the top of the notebook, allowing custom styles in classic/non-myst notebooks
'''
cell = {
"cell_type": "code",
"execution_count": 0,
"id": "md-style",
"metadata": {},
"outputs": [],
"source": [MD_STYLE]
}
self.data.insert(0, cell)
return self
def remove_tagged_cells(self, tags=TAGS_TO_REMOVE):
'''
:param tags: should be a Python set of tags. Any cells with any of these tags will be removed from the output notebook.
'''
self.data = [cell for cell in self.data
if not (tags & set(cell['metadata'].get('tags', [])))]
return self
def save_nb(self, nb_file):
'''
Saves notebook json at provided path
'''
self.nb_json['cells'] = self.data
with open(nb_file, 'w') as f:
json.dump(self.nb_json, f)
return self
@staticmethod
def load_nb(nb_file):
'''
Loads as JSON an ipynb file.
'''
with open(nb_file) as f:
return json.load(f)
@click.command()
@click.option('--nb-input', default='textbook/notebooks')
@click.option('--nb-output', default='textbook/_build/html/_sources/notebooks')
@click.option('--nb-output-md', default='textbook/_build/html/_sources/notebooks')
@click.option('--inline-images', is_flag=True, default=False)
@click.option('--remove-admonitions', is_flag=True, default=False)
@click.option('--render-md/--no-render', is_flag=True, default=True)
def main(nb_input, nb_output, nb_output_md, inline_images, remove_admonitions, render_md):
'''
:param nb_input: path for reading a notebook or directory containing notebooks (may be nested)
:param nb_output: path where processed notebooks will be saved
'''
root = Path(__file__).parents[1]
nb_input = root / Path(nb_input)
nb_output = root / Path(nb_output)
nb_output_md = root / Path(nb_output_md)
# Create folder for storing pure-Markdown notebooks, if it doesn't exist
if not nb_output_md.exists() and nb_output_md.is_dir:
nb_output_md.mkdir()
if nb_input.is_file() and nb_input.suffix == '.ipynb':
nb_paths = [(nb_input, nb_output, nb_output_md)]
else:
glob = nb_input.rglob('*.ipynb')
# Assumes output notebooks should follow the same directory structure as input notebooks, e.g., lessons and homework
nb_paths = [(p,
nb_output / p.parts[-2] / p.name,
nb_output_md / p.parts[-2] / f'{p.stem}-md{p.suffix}') # Add -md to end of non-MyST notebooks
for p in glob if p.parts[-2] != '.ipynb_checkpoints']
for in_, out, md_out in nb_paths:
logger.info(f'Processing notebook {in_}; saving output to {out}.')
nb = Notebook(in_)
nb.remove_tagged_cells()
for cell in nb:
cell.make_glossary_links().strip_footnotes().apply_hidden().clear_outputs()
if inline_images:
cell.inline_images()
if remove_admonitions:
cell.remove_admonitions()
nb.hide_tags().save_nb(out)
if render_md:
logger.info(f'Creating pure Markdown notebook at {md_out}.')
nb.myst_to_md().add_md_style().save_nb(md_out)
if __name__ == '__main__':
main()