diff --git a/course_utils/postprocessing.py b/course_utils/postprocessing.py index 9123d03..d2cd661 100644 --- a/course_utils/postprocessing.py +++ b/course_utils/postprocessing.py @@ -5,6 +5,9 @@ from pathlib import Path import logging from itertools import dropwhile +from markdown_it.renderer import RendererHTML +from myst_parser.config.main import MdParserConfig +from myst_parser.parsers.mdit import create_md_parser logging.basicConfig() logger=logging.getLogger(__name__) @@ -25,6 +28,37 @@ # Pattern for Sphinx glossary directive TERM_PATTERN = re.compile(r'{term}`([A-Za-z ]+)`') GLOSSARY_URL = 'https://gwu-libraries.github.io/python-camp/glossary.html#term-' +# HTML for hint directives (dropdowns) +HINT_HTML = '''
+ {title} + {content} +
+''' +# HTML for hidden code +CODE_HTML = '''
+Click for a Solution +

+{code}
+
+
+''' + +# HTML to style hidden cells in NB classic +MD_STYLE = '''%%html + +''' class Notebook: @@ -35,6 +69,8 @@ def __init__(self, nb_file): ''' self.nb_json = self.load_nb(nb_file) self.data = self.nb_json['cells'] + # Markdown parser for inner content when creating HTML directly + self.md = create_md_parser(MdParserConfig(), RendererHTML) def __iter__(self): # implements iteration protocol @@ -101,12 +137,33 @@ def apply_hidden(self): # Add comment that will be visible on toggled cell self.data[self.index]['source'].insert(0, '#Click to see the solution.\n') return self + + def hide_for_classic(self, cell): + ''' + Replaces a hidden/collapsed code cell with an HTML/Markdown cell to hide the content on the "Classic" interface + ''' + cell_source = cell['source'] + # Remove leading comment + if cell_source[0].startswith('#'): + cell_source.pop(0) + cell_source = CODE_HTML.format(code=''.join(cell_source)) + # Change cell type + cell['cell_type'] = 'markdown' + # Remove code-specific metadata + del cell['execution_count'] + del cell['outputs'] + cell['source'] = cell_source + return cell - def remove_directives(self): + def myst_to_md(self): ''' - Removes MyST directives from the notebook's markdown cells. Assumes such directives are enclosed in four backticks (as opposed to three for code blocks.) Leaves the inner Markdown intact. In the case of the {image} directives, replaces the content with a standard inline image reference. + Replaces MyST directives in notebook's markdown cells with regular Markdown/rendered HTML, to facilitate use in environments lacking the jupyterlab_myst plugin. ''' for i, cell in enumerate(self.nb_json['cells']): + # Check for hidden code cells and replace with HTML + if cell['cell_type'] == 'code' and ('hide-cell' in cell['metadata'].get('tags', [])): + self.nb_json['cells'][i] = self.hide_for_classic(cell) + continue # Assumes the directive encloses the entire cell, excluding any blank initial lines cell_content = list(dropwhile(lambda x: not x or x.isspace(), cell['source'])) m = DIRECTIVE_PATTERN.match(cell_content[0]) @@ -124,7 +181,14 @@ def remove_directives(self): cell_content = [f'![{alt_text}]({image_url})'] # Other directive -- no label provided, but heading needed case (directive, '') if DIRECTIVE_MAPPING.get(directive): - cell_content[0] = '#' * HEADING_SUB_LEVEL + f' {DIRECTIVE_MAPPING[directive]}\n' + # Check for dropdowns + if cell_content[1] == ':class: dropdown\n': + cell_title = f'Click for a {DIRECTIVE_MAPPING.get(directive)}' + # Render the rest of the cell as HTML, removing blank lines first + inner_content = [c for c in cell_content if not c.startswith('`' * DIRECTIVE_BACKTICKS) and not c.startswith(':class:') and not c.isspace()] + # Remove blank lines from the resulting HTML + inner_content = self.md.render(''.join(inner_content)).replace('\n', '') + cell_content = [HINT_HTML.format(title=cell_title, content=inner_content) ] # No heading needed case _: cell_content.pop(0) @@ -133,6 +197,21 @@ def remove_directives(self): self.nb_json['cells'][i]['source'] = cell_content return self + def add_md_style(self): + ''' + Adds an %%html block to the top of the notebook, allowing custom styles in classic/non-myst notebooks + ''' + cell = { + "cell_type": "code", + "execution_count": 0, + "id": "md-style", + "metadata": {}, + "outputs": [], + "source": [MD_STYLE] + } + self.data.insert(0, cell) + return self + def remove_tagged_cells(self, tags=TAGS_TO_REMOVE): ''' :param tags: should be a Python set of tags. Any cells with any of these tags will be removed from the output notebook. @@ -161,7 +240,8 @@ def load_nb(nb_file): @click.command() @click.option('--nb-input', default='textbook/notebooks') @click.option('--nb-output', default='textbook/_build/html/_sources/notebooks') -def main(nb_input, nb_output): +@click.option('--nb-output-md', default='textbook/_build/html/_sources/notebooks') +def main(nb_input, nb_output, nb_output_md): ''' :param nb_input: path for reading a notebook or directory containing notebooks (may be nested) :param nb_output: path where processed notebooks will be saved @@ -169,13 +249,20 @@ def main(nb_input, nb_output): root = Path(__file__).parents[1] nb_input = root / Path(nb_input) nb_output = root / Path(nb_output) + nb_output_md = root / Path(nb_output_md) + # Create folder for storing pure-Markdown notebooks, if it doesn't exist + if not nb_output_md.exists() and nb_output_md.is_dir: + nb_output_md.mkdir() if nb_input.is_file() and nb_input.suffix == '.ipynb': - nb_paths = [(nb_input, nb_output)] + nb_paths = [(nb_input, nb_output, nb_output_md)] else: glob = nb_input.rglob('*.ipynb') # Assumes output notebooks should follow the same directory structure as input notebooks, e.g., lessons and homework - nb_paths = [(p, nb_output / p.parts[-2] / p.name) for p in glob if p.parts[-2] != '.ipynb_checkpoints'] - for in_, out in nb_paths: + nb_paths = [(p, + nb_output / p.parts[-2] / p.name, + nb_output_md / p.parts[-2] / f'{p.stem}-md{p.suffix}') # Add -md to end of non-MyST notebooks + for p in glob if p.parts[-2] != '.ipynb_checkpoints'] + for in_, out, md_out in nb_paths: logger.info(f'Processing notebook {in_}; saving output to {out}.') nb = Notebook(in_) @@ -183,6 +270,8 @@ def main(nb_input, nb_output): for cell in nb: cell.make_glossary_links().apply_hidden().clear_outputs() nb.hide_tags().save_nb(out) + logger.info(f'Creating pure Markdown notebook at {md_out}.') + nb.myst_to_md().add_md_style().save_nb(md_out) if __name__ == '__main__': main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1fe49c6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,55 @@ +accessible-pygments==0.0.4 +alabaster==0.7.13 +Babel==2.12.1 +click==8.1.3 +docutils==0.18.1 +ghp-import==2.1.0 +greenlet==2.0.2 +guzzle-sphinx-theme==0.7.11 +imagesize==1.4.1 +importlib-metadata==6.7.0 +jupyter-book==0.15.1 +jupyter-cache==0.6.1 +jupyter-contrib-core==0.4.2 +jupyter-highlight-selected-word==0.2.0 +jupyter-nbextensions-configurator==0.6.3 +jupyterlab_myst==2.0.1 +jupytext==1.14.7 +latexcodec==2.0.1 +linkify-it-py==2.0.2 +lxml==4.9.3 +markdown-it-py==2.2.0 +mdit-py-plugins==0.3.5 +mdurl==0.1.2 +myst-nb==0.17.2 +myst-parser==0.18.1 +nbclient==0.7.4 +pybtex==0.24.0 +pybtex-docutils==1.0.2 +pydata-sphinx-theme==0.13.3 +snowballstemmer==2.2.0 +Sphinx==5.0.2 +sphinx-book-theme==1.0.1 +sphinx-comments==0.0.3 +sphinx-copybutton==0.5.2 +sphinx-jupyterbook-latex==0.5.2 +sphinx-multitoc-numbering==0.1.3 +sphinx-thebe==0.2.1 +sphinx-togglebutton==0.3.2 +sphinx_design==0.3.0 +sphinx_external_toc==0.3.1 +sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-bibtex==2.5.0 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.9 +SQLAlchemy==2.0.17 +tabulate==0.9.0 +toml==0.10.2 +tqdm==4.65.0 +types-PyYAML==6.0.12.11 +typing_extensions==4.7.1 +uc-micro-py==1.0.2 +zipp==3.15.0 diff --git a/tests/test_postprocessing.ipynb b/tests/test_postprocessing.ipynb index edc3546..8958061 100644 --- a/tests/test_postprocessing.ipynb +++ b/tests/test_postprocessing.ipynb @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "e41339fc-9bff-4560-b682-9169405f8199", "metadata": { "editable": true, @@ -61,9 +61,18 @@ "hide-cell" ] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This cell should be hidden\n" + ] + } + ], "source": [ - "print(\"This cell should be hidden\")" + "msg = 'This cell should be hidden'\n", + "print(msg)" ] }, { @@ -96,6 +105,46 @@ "#This cell should be removed.\n", "assert False, 'This cell should have been removed.'" ] + }, + { + "cell_type": "markdown", + "id": "19b97cfb-9a72-408e-9e4f-d347574155ec", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "````{admonition} Try it out!\n", + ":class: try-it-out\n", + "\n", + "Use `split()` on the `term` variable (defined above) and compare the output with that of `course.split()`. \n", + "\n", + "Can you tell how `split()` works? How does it know where to separate the string?\n", + "\n", + "````" + ] + }, + { + "cell_type": "markdown", + "id": "26ec2a29-bccd-4908-b237-0510ad54fac9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "````{hint}\n", + ":class: dropdown\n", + "- Try changing the text between quotation marks to see how the output varies.\n", + "- The name (left side of the equals sign) acts like a label for the value (right side of the equal sign -- here, the content between quotation marks).\n", + "- What do you think `print` does when it is given the name `my_workshop`? \n", + "````" + ] } ], "metadata": { diff --git a/tests/test_postprocessing.py b/tests/test_postprocessing.py index 1640e39..9e9d23d 100644 --- a/tests/test_postprocessing.py +++ b/tests/test_postprocessing.py @@ -24,3 +24,18 @@ def testProcessing(self): cells_for_removal = [cell for cell in self.notebook.data if 'remove-cell' in cell['metadata'].get('tags', [])] self.assertEqual(cells_for_removal, [], 'cells tagged for removal not removed') + def testMyST2oMD(self): + + self.notebook.myst_to_md() + self.assertEqual(self.notebook.data[3]['source'], + ["#### Try it out!\n", "\n", "Use `split()` on the `term` variable (defined above) and compare the output with that of `course.split()`. \n", "\n", "Can you tell how `split()` works? How does it know where to separate the string?\n", "\n"], + 'pure Markdown not created as expected') + self.assertEqual(self.notebook.data[4]['source'], + ["
\n Click for a Hint\n \n
\n"], + 'HTML for hints not created as expected' + ) + self.assertEqual(self.notebook.data[2]['source'], + "
\nClick for a Solution\n
\nmsg = 'This cell should be hidden'\nprint(msg)\n
\n
\n", + 'HTML for code solutions not created as expected') + self.notebook.add_md_style() + assert self.notebook.data[0]['source'][0].startswith(r'%%html'), 'expected HTML magic in first cell' \ No newline at end of file