Skip to content

Commit

Permalink
Added requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
dolsysmith committed Aug 25, 2023
1 parent 40c966e commit 77c4a7c
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 10 deletions.
103 changes: 96 additions & 7 deletions course_utils/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from pathlib import Path
import logging
from itertools import dropwhile
from markdown_it.renderer import RendererHTML
from myst_parser.config.main import MdParserConfig
from myst_parser.parsers.mdit import create_md_parser

logging.basicConfig()
logger=logging.getLogger(__name__)
Expand All @@ -25,6 +28,37 @@
# Pattern for Sphinx glossary directive
TERM_PATTERN = re.compile(r'{term}`([A-Za-z ]+)`')
GLOSSARY_URL = 'https://gwu-libraries.github.io/python-camp/glossary.html#term-'
# HTML for hint directives (dropdowns)
HINT_HTML = '''<details>
<summary>{title}</summary>
{content}
</details>
'''
# HTML for hidden code
CODE_HTML = '''<details>
<summary>Click for a Solution</summary>
<pre><code>
{code}
</code></pre>
</details>
'''

# HTML to style hidden cells in NB classic
MD_STYLE = '''%%html
<style>
details {
border: 1px solid; border-radius: 4px; padding: 0.5em 0.5em 0; }
summary {
font-weight: bold; margin: -0.5em -0.5em 0; padding: 0.5em; background-color: rgba(233,140,61,.2);}
details[open] {
padding: 0.5em; }
details[open] summary {
border-bottom: 1px solid; margin-bottom: 0.5em; background-color: rgba(66,129,81, .2); }
details li {
margin: 10px 0;
}
</style>
'''

class Notebook:

Expand All @@ -35,6 +69,8 @@ def __init__(self, nb_file):
'''
self.nb_json = self.load_nb(nb_file)
self.data = self.nb_json['cells']
# Markdown parser for inner content when creating HTML directly
self.md = create_md_parser(MdParserConfig(), RendererHTML)

def __iter__(self):
# implements iteration protocol
Expand Down Expand Up @@ -101,12 +137,33 @@ def apply_hidden(self):
# Add comment that will be visible on toggled cell
self.data[self.index]['source'].insert(0, '#Click to see the solution.\n')
return self

def hide_for_classic(self, cell):
'''
Replaces a hidden/collapsed code cell with an HTML/Markdown cell to hide the content on the "Classic" interface
'''
cell_source = cell['source']
# Remove leading comment
if cell_source[0].startswith('#'):
cell_source.pop(0)
cell_source = CODE_HTML.format(code=''.join(cell_source))
# Change cell type
cell['cell_type'] = 'markdown'
# Remove code-specific metadata
del cell['execution_count']
del cell['outputs']
cell['source'] = cell_source
return cell

def remove_directives(self):
def myst_to_md(self):
'''
Removes MyST directives from the notebook's markdown cells. Assumes such directives are enclosed in four backticks (as opposed to three for code blocks.) Leaves the inner Markdown intact. In the case of the {image} directives, replaces the content with a standard inline image reference.
Replaces MyST directives in notebook's markdown cells with regular Markdown/rendered HTML, to facilitate use in environments lacking the jupyterlab_myst plugin.
'''
for i, cell in enumerate(self.nb_json['cells']):
# Check for hidden code cells and replace with HTML
if cell['cell_type'] == 'code' and ('hide-cell' in cell['metadata'].get('tags', [])):
self.nb_json['cells'][i] = self.hide_for_classic(cell)
continue
# Assumes the directive encloses the entire cell, excluding any blank initial lines
cell_content = list(dropwhile(lambda x: not x or x.isspace(), cell['source']))
m = DIRECTIVE_PATTERN.match(cell_content[0])
Expand All @@ -124,7 +181,14 @@ def remove_directives(self):
cell_content = [f'![{alt_text}]({image_url})']
# Other directive -- no label provided, but heading needed
case (directive, '') if DIRECTIVE_MAPPING.get(directive):
cell_content[0] = '#' * HEADING_SUB_LEVEL + f' {DIRECTIVE_MAPPING[directive]}\n'
# Check for dropdowns
if cell_content[1] == ':class: dropdown\n':
cell_title = f'Click for a {DIRECTIVE_MAPPING.get(directive)}'
# Render the rest of the cell as HTML, removing blank lines first
inner_content = [c for c in cell_content if not c.startswith('`' * DIRECTIVE_BACKTICKS) and not c.startswith(':class:') and not c.isspace()]
# Remove blank lines from the resulting HTML
inner_content = self.md.render(''.join(inner_content)).replace('\n', '')
cell_content = [HINT_HTML.format(title=cell_title, content=inner_content) ]
# No heading needed
case _:
cell_content.pop(0)
Expand All @@ -133,6 +197,21 @@ def remove_directives(self):
self.nb_json['cells'][i]['source'] = cell_content
return self

def add_md_style(self):
'''
Adds an %%html block to the top of the notebook, allowing custom styles in classic/non-myst notebooks
'''
cell = {
"cell_type": "code",
"execution_count": 0,
"id": "md-style",
"metadata": {},
"outputs": [],
"source": [MD_STYLE]
}
self.data.insert(0, cell)
return self

def remove_tagged_cells(self, tags=TAGS_TO_REMOVE):
'''
:param tags: should be a Python set of tags. Any cells with any of these tags will be removed from the output notebook.
Expand Down Expand Up @@ -161,28 +240,38 @@ def load_nb(nb_file):
@click.command()
@click.option('--nb-input', default='textbook/notebooks')
@click.option('--nb-output', default='textbook/_build/html/_sources/notebooks')
def main(nb_input, nb_output):
@click.option('--nb-output-md', default='textbook/_build/html/_sources/notebooks')
def main(nb_input, nb_output, nb_output_md):
'''
:param nb_input: path for reading a notebook or directory containing notebooks (may be nested)
:param nb_output: path where processed notebooks will be saved
'''
root = Path(__file__).parents[1]
nb_input = root / Path(nb_input)
nb_output = root / Path(nb_output)
nb_output_md = root / Path(nb_output_md)
# Create folder for storing pure-Markdown notebooks, if it doesn't exist
if not nb_output_md.exists() and nb_output_md.is_dir:
nb_output_md.mkdir()
if nb_input.is_file() and nb_input.suffix == '.ipynb':
nb_paths = [(nb_input, nb_output)]
nb_paths = [(nb_input, nb_output, nb_output_md)]
else:
glob = nb_input.rglob('*.ipynb')
# Assumes output notebooks should follow the same directory structure as input notebooks, e.g., lessons and homework
nb_paths = [(p, nb_output / p.parts[-2] / p.name) for p in glob if p.parts[-2] != '.ipynb_checkpoints']
for in_, out in nb_paths:
nb_paths = [(p,
nb_output / p.parts[-2] / p.name,
nb_output_md / p.parts[-2] / f'{p.stem}-md{p.suffix}') # Add -md to end of non-MyST notebooks
for p in glob if p.parts[-2] != '.ipynb_checkpoints']
for in_, out, md_out in nb_paths:

logger.info(f'Processing notebook {in_}; saving output to {out}.')
nb = Notebook(in_)
nb.remove_tagged_cells()
for cell in nb:
cell.make_glossary_links().apply_hidden().clear_outputs()
nb.hide_tags().save_nb(out)
logger.info(f'Creating pure Markdown notebook at {md_out}.')
nb.myst_to_md().add_md_style().save_nb(md_out)

if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
accessible-pygments==0.0.4
alabaster==0.7.13
Babel==2.12.1
click==8.1.3
docutils==0.18.1
ghp-import==2.1.0
greenlet==2.0.2
guzzle-sphinx-theme==0.7.11
imagesize==1.4.1
importlib-metadata==6.7.0
jupyter-book==0.15.1
jupyter-cache==0.6.1
jupyter-contrib-core==0.4.2
jupyter-highlight-selected-word==0.2.0
jupyter-nbextensions-configurator==0.6.3
jupyterlab_myst==2.0.1
jupytext==1.14.7
latexcodec==2.0.1
linkify-it-py==2.0.2
lxml==4.9.3
markdown-it-py==2.2.0
mdit-py-plugins==0.3.5
mdurl==0.1.2
myst-nb==0.17.2
myst-parser==0.18.1
nbclient==0.7.4
pybtex==0.24.0
pybtex-docutils==1.0.2
pydata-sphinx-theme==0.13.3
snowballstemmer==2.2.0
Sphinx==5.0.2
sphinx-book-theme==1.0.1
sphinx-comments==0.0.3
sphinx-copybutton==0.5.2
sphinx-jupyterbook-latex==0.5.2
sphinx-multitoc-numbering==0.1.3
sphinx-thebe==0.2.1
sphinx-togglebutton==0.3.2
sphinx_design==0.3.0
sphinx_external_toc==0.3.1
sphinxcontrib-applehelp==1.0.4
sphinxcontrib-bibtex==2.5.0
sphinxcontrib-devhelp==1.0.2
sphinxcontrib-htmlhelp==2.0.1
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.3
sphinxcontrib-serializinghtml==1.1.9
SQLAlchemy==2.0.17
tabulate==0.9.0
toml==0.10.2
tqdm==4.65.0
types-PyYAML==6.0.12.11
typing_extensions==4.7.1
uc-micro-py==1.0.2
zipp==3.15.0
55 changes: 52 additions & 3 deletions tests/test_postprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "e41339fc-9bff-4560-b682-9169405f8199",
"metadata": {
"editable": true,
Expand All @@ -61,9 +61,18 @@
"hide-cell"
]
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This cell should be hidden\n"
]
}
],
"source": [
"print(\"This cell should be hidden\")"
"msg = 'This cell should be hidden'\n",
"print(msg)"
]
},
{
Expand Down Expand Up @@ -96,6 +105,46 @@
"#This cell should be removed.\n",
"assert False, 'This cell should have been removed.'"
]
},
{
"cell_type": "markdown",
"id": "19b97cfb-9a72-408e-9e4f-d347574155ec",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"````{admonition} Try it out!\n",
":class: try-it-out\n",
"\n",
"Use `split()` on the `term` variable (defined above) and compare the output with that of `course.split()`. \n",
"\n",
"Can you tell how `split()` works? How does it know where to separate the string?\n",
"\n",
"````"
]
},
{
"cell_type": "markdown",
"id": "26ec2a29-bccd-4908-b237-0510ad54fac9",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"````{hint}\n",
":class: dropdown\n",
"- Try changing the text between quotation marks to see how the output varies.\n",
"- The name (left side of the equals sign) acts like a label for the value (right side of the equal sign -- here, the content between quotation marks).\n",
"- What do you think `print` does when it is given the name `my_workshop`? \n",
"````"
]
}
],
"metadata": {
Expand Down
15 changes: 15 additions & 0 deletions tests/test_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,18 @@ def testProcessing(self):
cells_for_removal = [cell for cell in self.notebook.data if 'remove-cell' in cell['metadata'].get('tags', [])]
self.assertEqual(cells_for_removal, [], 'cells tagged for removal not removed')

def testMyST2oMD(self):

self.notebook.myst_to_md()
self.assertEqual(self.notebook.data[3]['source'],
["#### Try it out!\n", "\n", "Use `split()` on the `term` variable (defined above) and compare the output with that of `course.split()`. \n", "\n", "Can you tell how `split()` works? How does it know where to separate the string?\n", "\n"],
'pure Markdown not created as expected')
self.assertEqual(self.notebook.data[4]['source'],
["<details>\n <summary>Click for a Hint</summary>\n <ul><li>Try changing the text between quotation marks to see how the output varies.</li><li>The name (left side of the equals sign) acts like a label for the value (right side of the equal sign -- here, the content between quotation marks).</li><li>What do you think <code>print</code> does when it is given the name <code>my_workshop</code>?</li></ul>\n</details>\n"],
'HTML for hints not created as expected'
)
self.assertEqual(self.notebook.data[2]['source'],
"<details>\n<summary>Click for a Solution</summary>\n<pre><code>\nmsg = 'This cell should be hidden'\nprint(msg)\n</code></pre>\n</details>\n",
'HTML for code solutions not created as expected')
self.notebook.add_md_style()
assert self.notebook.data[0]['source'][0].startswith(r'%%html'), 'expected HTML magic in first cell'

0 comments on commit 77c4a7c

Please sign in to comment.