-
Notifications
You must be signed in to change notification settings - Fork 884
/
Copy pathnotebook_version_standardizer.py
169 lines (139 loc) · 5.45 KB
/
notebook_version_standardizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import json
import os
import click
DOCS_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "source")
def _get_ipython_notebooks(docs_source):
directories_to_skip = ["_templates", "generated", ".ipynb_checkpoints"]
notebooks = []
for root, _, filenames in os.walk(docs_source):
if any(dir_ in root for dir_ in directories_to_skip):
continue
for filename in filenames:
if filename.endswith(".ipynb"):
notebooks.append(os.path.join(root, filename))
return notebooks
def _check_delete_empty_cell(notebook, delete=True):
with open(notebook, "r") as f:
source = json.load(f)
cell = source["cells"][-1]
if cell["cell_type"] == "code" and cell["source"] == []:
# this is an empty cell, which we should delete
if delete:
source["cells"] = source["cells"][:-1]
else:
return False
if delete:
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
else:
return True
def _check_execution_and_output(notebook):
with open(notebook, "r") as f:
source = json.load(f)
for cells in source["cells"]:
if cells["cell_type"] == "code" and (
cells["execution_count"] is not None or cells["outputs"] != []
):
return False
return True
def _check_python_version(notebook, default_version):
with open(notebook, "r") as f:
source = json.load(f)
if source["metadata"]["language_info"]["version"] != default_version:
return False
return True
def _fix_python_version(notebook, default_version):
with open(notebook, "r") as f:
source = json.load(f)
source["metadata"]["language_info"]["version"] = default_version
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
def _fix_execution_and_output(notebook):
with open(notebook, "r") as f:
source = json.load(f)
for cells in source["cells"]:
if cells["cell_type"] == "code" and cells["execution_count"] is not None:
cells["execution_count"] = None
cells["outputs"] = []
source["metadata"]["kernelspec"]["display_name"] = "Python 3"
source["metadata"]["kernelspec"]["name"] = "python3"
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
def _get_notebooks_with_executions_and_empty(notebooks, default_version="3.9.2"):
executed = []
empty_last_cell = []
versions = []
for notebook in notebooks:
if not _check_execution_and_output(notebook):
executed.append(notebook)
if not _check_delete_empty_cell(notebook, delete=False):
empty_last_cell.append(notebook)
if not _check_python_version(notebook, default_version):
versions.append(notebook)
return (executed, empty_last_cell, versions)
def _fix_versions(notebooks, default_version="3.9.2"):
for notebook in notebooks:
_fix_python_version(notebook, default_version)
def _remove_notebook_empty_last_cell(notebooks):
for notebook in notebooks:
_check_delete_empty_cell(notebook, delete=True)
def _standardize_outputs(notebooks):
for notebook in notebooks:
_fix_execution_and_output(notebook)
@click.group()
def cli():
"""no-op"""
@cli.command()
def standardize():
notebooks = _get_ipython_notebooks(DOCS_PATH)
(
executed_notebooks,
empty_cells,
versions,
) = _get_notebooks_with_executions_and_empty(notebooks)
if executed_notebooks:
_standardize_outputs(executed_notebooks)
executed_notebooks = ["\t" + notebook for notebook in executed_notebooks]
executed_notebooks = "\n".join(executed_notebooks)
click.echo(f"Removed the outputs for:\n {executed_notebooks}")
if empty_cells:
_remove_notebook_empty_last_cell(empty_cells)
empty_cells = ["\t" + notebook for notebook in empty_cells]
empty_cells = "\n".join(empty_cells)
click.echo(f"Removed the empty cells for:\n {empty_cells}")
if versions:
_fix_versions(versions)
versions = ["\t" + notebook for notebook in versions]
versions = "\n".join(versions)
click.echo(f"Fixed python versions for:\n {versions}")
@cli.command()
def check_execution():
notebooks = _get_ipython_notebooks(DOCS_PATH)
(
executed_notebooks,
empty_cells,
versions,
) = _get_notebooks_with_executions_and_empty(notebooks)
if executed_notebooks:
executed_notebooks = ["\t" + notebook for notebook in executed_notebooks]
executed_notebooks = "\n".join(executed_notebooks)
raise SystemExit(
f"The following notebooks have executed outputs:\n {executed_notebooks}\n"
"Please run make lint-fix to fix this.",
)
if empty_cells:
empty_cells = ["\t" + notebook for notebook in empty_cells]
empty_cells = "\n".join(empty_cells)
raise SystemExit(
f"The following notebooks have empty cells at the end:\n {empty_cells}\n"
"Please run make lint-fix to fix this.",
)
if versions:
versions = ["\t" + notebook for notebook in versions]
versions = "\n".join(versions)
raise SystemExit(
f"The following notebooks have the wrong Python version: \n {versions}\n"
"Please run make lint-fix to fix this.",
)
if __name__ == "__main__":
cli()