Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies = [
"mkdocs-same-dir==0.1.3",
"pre-commit==4.1",
"pymdown-extensions==10.14.3",
"ruff==0.9.9",
"ruff==0.13.3",
]

[tool.ruff]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mkdocs-exclude == 1.0.2
markdown-exec[ansi] == 1.10.3

# Developer stuff
ruff == 0.11.6
ruff == 0.13.3
codespell == 2.4.1
pre-commit == 4.2.0

Expand Down
2 changes: 1 addition & 1 deletion s10_extra/documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dynamic (like video), but the site does not change (1).

We are in this module going to look at [Mkdocs](https://www.mkdocs.org/), which (in my opinion) is one of the easiest
systems to get started with because all documentation is written in markdown and the build system is written in Python.
As an alternativ, you can consider doing the exercises in [Sphinx](https://www.sphinx-doc.org/en/master/) which is
As an alternative, you can consider doing the exercises in [Sphinx](https://www.sphinx-doc.org/en/master/) which is
probably the most used documentation system for Python code. Sphinx offer more customization than Mkdocs, so is
generally preferred for larger projects with complex documentation, but for smaller projects Mkdocs should be easier to
get started with and is sufficient.
Expand Down
2 changes: 1 addition & 1 deletion s3_reproducibility/config_files.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ look online for your answers before looking at the solution. Remember: it's not
??? success "Solution"

From the top of the file `batch_size`, `x_dim`, `hidden_dim` can be found as hyperparameters. Looking through
the code it can be seen that the `latent_dim` of the encoder and decoder, `lr` for the optimzer, and `epochs` in
the code it can be seen that the `latent_dim` of the encoder and decoder, `lr` for the optimizer, and `epochs` in
the training loop are also hyperparameters. Finally, the `seed` is not included in the script but is needed to
make the script fully reproducible, e.g. `torch.manual_seed(seed)`.

Expand Down
2 changes: 1 addition & 1 deletion s7_deployment/exercise_files/onnx_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
graph = make_graph([node1, node2], "lr", [X, A, B], [Y]) # nodes # a name # inputs # outputs

# onnx graph
# there is no metata in this case.
# there is no metadata in this case.

onnx_model = make_model(graph)

Expand Down
45 changes: 27 additions & 18 deletions tools/to_pdf/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import os
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urljoin, urlparse

import pdfkit
import requests
from bs4 import BeautifulSoup
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
import re


def natural_key(s):
"""Turn a string into a list of ints and text, so 's10' > 's9'."""
return [int(text) if text.isdigit() else text.lower()
for text in re.split(r'(\d+)', s)]
return [int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", s)]


def get_all_links(base_url):
Expand Down Expand Up @@ -47,7 +48,7 @@ def save_pages_as_pdfs(urls, output_dir="pages_pdfs"):
os.makedirs(output_dir, exist_ok=True)
pdf_files = []
for i, url in enumerate(urls):
out_file = os.path.join(output_dir, f"page_{i+1}.pdf")
out_file = os.path.join(output_dir, f"page_{i + 1}.pdf")
try:
pdfkit.from_url(url, out_file)
pdf_files.append(out_file)
Expand All @@ -68,17 +69,24 @@ def merge_pdfs(pdf_files, output_file="combined.pdf"):

import subprocess


def compress_pdf(input_file, output_file, quality="/ebook"):
subprocess.run([
"gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4",
f"-dPDFSETTINGS={quality}",
"-dNOPAUSE", "-dQUIET", "-dBATCH",
f"-sOutputFile={output_file}", input_file
])
subprocess.run(
[
"gs",
"-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.4",
f"-dPDFSETTINGS={quality}",
"-dNOPAUSE",
"-dQUIET",
"-dBATCH",
f"-sOutputFile={output_file}",
input_file,
]
)
print(f"📉 Compressed {input_file} → {output_file}")



def split_pdf(input_file, output_files):
reader = PdfReader(input_file)
total_pages = len(reader.pages)
Expand All @@ -95,21 +103,22 @@ def split_pdf(input_file, output_files):
writer.add_page(page)
with open(out_file, "wb") as f:
writer.write(f)
print(f"✂️ Split pages {start+1}-{end} → {out_file}")
print(f"✂️ Split pages {start + 1}-{end} → {out_file}")
start = end


if __name__ == "__main__":
base_url = "https://skaftenicki.github.io/dtu_mlops/"
urls = get_all_links(base_url)
urls = sorted(urls, key=natural_key) # 👈 use natural sort
urls = sorted(urls, key=natural_key) # 👈 use natural sort
print(f"Found {len(urls)} pages.")

pdf_files = save_pages_as_pdfs(urls)
merge_pdfs(pdf_files, "dtu_mlops_all.pdf")
split_pdf("dtu_mlops_all.pdf", [
"dtu_mlops_part1.pdf", "dtu_mlops_part2.pdf", "dtu_mlops_part3.pdf", "dtu_mlops_part4.pdf"
])
split_pdf(
"dtu_mlops_all.pdf",
["dtu_mlops_part1.pdf", "dtu_mlops_part2.pdf", "dtu_mlops_part3.pdf", "dtu_mlops_part4.pdf"],
)
compress_pdf("dtu_mlops_part1.pdf", "dtu_mlops_part1_small.pdf")
compress_pdf("dtu_mlops_part2.pdf", "dtu_mlops_part2_small.pdf")
compress_pdf("dtu_mlops_part3.pdf", "dtu_mlops_part3_small.pdf")
Expand Down