Skip to content

Commit 0e40b6e

Browse files
jeremymanningclaude
andcommitted
Add people.html build system
- extract_people.py: One-time extraction from HTML to Excel - build_people.py: Generate HTML from data/people.xlsx - templates/people.html: Template with content markers - data/people.xlsx: Extracted data (director, 12 members, 72 alumni, 10 collaborators) - test_build_people.py: 20 passing tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 4c5e410 commit 0e40b6e

File tree

5 files changed

+1341
-0
lines changed

5 files changed

+1341
-0
lines changed

data/people.xlsx

12.8 KB
Binary file not shown.

scripts/build_people.py

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
#!/usr/bin/env python3
2+
"""Build people.html from spreadsheet data.
3+
4+
Reads data from data/people.xlsx and generates people.html
5+
using the template in templates/people.html.
6+
"""
7+
import re
8+
from pathlib import Path
9+
from typing import List, Dict, Any
10+
import openpyxl
11+
12+
from utils import inject_content
13+
14+
15+
def load_people(xlsx_path: Path) -> Dict[str, List[Dict[str, Any]]]:
16+
"""Load all people data from Excel spreadsheet.
17+
18+
Args:
19+
xlsx_path: Path to the people.xlsx file
20+
21+
Returns:
22+
Dictionary with keys for each section (director, members, etc.)
23+
Each value is a list of person dictionaries.
24+
"""
25+
wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
26+
27+
data = {}
28+
for sheet_name in wb.sheetnames:
29+
sheet = wb[sheet_name]
30+
31+
# Get headers from first row
32+
headers = [cell.value for cell in sheet[1]]
33+
34+
rows = []
35+
for row in sheet.iter_rows(min_row=2, values_only=True):
36+
# Skip empty rows
37+
if not any(cell is not None for cell in row):
38+
continue
39+
40+
row_dict = {}
41+
for header, value in zip(headers, row):
42+
if value is None:
43+
row_dict[header] = ''
44+
else:
45+
row_dict[header] = value
46+
rows.append(row_dict)
47+
48+
data[sheet_name] = rows
49+
50+
wb.close()
51+
return data
52+
53+
54+
def generate_director_content(director: Dict[str, Any]) -> str:
55+
"""Generate HTML for the lab director section.
56+
57+
Args:
58+
director: Dictionary with director data
59+
60+
Returns:
61+
HTML string for director section
62+
"""
63+
image = director.get('image', '')
64+
name = director.get('name', '')
65+
name_url = director.get('name_url', '')
66+
role = director.get('role', '')
67+
bio = director.get('bio', '')
68+
links_html = director.get('links_html', '')
69+
70+
# Build image path
71+
image_src = f"images/people/{image}" if image else ""
72+
73+
# Build name with optional link
74+
if name_url:
75+
name_display = f'<a href="{name_url}" target="_blank">{name}</a>'
76+
else:
77+
name_display = name
78+
79+
# Build role display
80+
role_display = f' | {role}' if role else ''
81+
82+
# Build links paragraph
83+
links_p = f'\n <p>{links_html}</p>' if links_html else ''
84+
85+
html = f''' <div class="two-column lab-director">
86+
<figure>
87+
<img src="{image_src}" alt="{name}">
88+
</figure>
89+
<div>
90+
<h3>{name_display}{role_display}</h3>
91+
<p>{bio}</p>{links_p}
92+
</div>
93+
</div>'''
94+
95+
return html
96+
97+
98+
def generate_member_card(member: Dict[str, Any]) -> str:
99+
"""Generate HTML for a single member card.
100+
101+
Args:
102+
member: Dictionary with member data
103+
104+
Returns:
105+
HTML string for the member card
106+
"""
107+
image = member.get('image', '')
108+
name = member.get('name', '')
109+
name_url = member.get('name_url', '')
110+
role = member.get('role', '')
111+
bio = member.get('bio', '')
112+
113+
# Build image path
114+
image_src = f"images/people/{image}" if image else ""
115+
116+
# Build name with optional link
117+
if name_url:
118+
name_display = f'<a href="{name_url}" target="_blank">{name}</a>'
119+
else:
120+
name_display = name
121+
122+
# Build role display
123+
role_display = f' | {role}' if role else ''
124+
125+
html = f''' <div class="person-card">
126+
<img src="{image_src}" alt="{name}">
127+
<h3>{name_display}{role_display}</h3>
128+
<p>{bio}</p>
129+
</div>'''
130+
131+
return html
132+
133+
134+
def generate_members_content(members: List[Dict[str, Any]]) -> str:
135+
"""Generate HTML content for all active lab members.
136+
137+
Members are arranged in a grid of 3 per row.
138+
139+
Args:
140+
members: List of member dictionaries
141+
142+
Returns:
143+
HTML string with all member cards organized in grids
144+
"""
145+
if not members:
146+
return ''
147+
148+
cards = [generate_member_card(m) for m in members]
149+
150+
# Group cards into rows of 3
151+
grids = []
152+
for i in range(0, len(cards), 3):
153+
row_cards = cards[i:i+3]
154+
grid_html = ' <div class="people-grid">\n'
155+
grid_html += '\n'.join(row_cards)
156+
grid_html += '\n </div>'
157+
grids.append(grid_html)
158+
159+
return '\n\n'.join(grids)
160+
161+
162+
def generate_alumni_entry(alum: Dict[str, Any]) -> str:
163+
"""Generate HTML for a single alumni entry.
164+
165+
Args:
166+
alum: Dictionary with alumni data (name, name_url, years, current_position, current_position_url)
167+
168+
Returns:
169+
HTML string for the alumni entry
170+
"""
171+
name = alum.get('name', '')
172+
name_url = alum.get('name_url', '')
173+
years = alum.get('years', '')
174+
current_position = alum.get('current_position', '')
175+
current_position_url = alum.get('current_position_url', '')
176+
177+
# Build name with optional link
178+
if name_url:
179+
name_display = f'<a href="{name_url}" target="_blank">{name}</a>'
180+
else:
181+
name_display = name
182+
183+
# Build position display with optional link
184+
# Position format is typically "now at Company" or "then a CDL grad student!"
185+
# We need to link the company/position name
186+
if current_position and current_position_url:
187+
# Extract the position name after "now at " or "then a "
188+
match = re.match(r'(now|then)\s+(at?)\s+(.+)', current_position)
189+
if match:
190+
prefix = f'{match.group(1)} {match.group(2)} '
191+
position_name = match.group(3)
192+
position_display = f'{prefix}<a href="{current_position_url}" target="_blank">{position_name}</a>'
193+
else:
194+
position_display = f'<a href="{current_position_url}" target="_blank">{current_position}</a>'
195+
else:
196+
position_display = current_position
197+
198+
# Build parenthetical info
199+
paren_parts = []
200+
if years:
201+
paren_parts.append(years)
202+
if position_display:
203+
paren_parts.append(position_display)
204+
205+
paren_display = f' ({"; ".join(paren_parts)})' if paren_parts else ''
206+
207+
return f'{name_display}{paren_display}'
208+
209+
210+
def generate_alumni_list_content(alumni: List[Dict[str, Any]]) -> str:
211+
"""Generate HTML content for an alumni list (postdocs, grads, managers).
212+
213+
Args:
214+
alumni: List of alumni dictionaries
215+
216+
Returns:
217+
HTML string with alumni entries separated by <br>
218+
"""
219+
if not alumni:
220+
return ''
221+
222+
entries = [generate_alumni_entry(a) for a in alumni]
223+
return '<br>\n '.join(entries)
224+
225+
226+
def generate_undergrad_entry(alum: Dict[str, Any]) -> str:
227+
"""Generate HTML for a single undergraduate alumni entry.
228+
229+
Args:
230+
alum: Dictionary with alumni data (name, years)
231+
232+
Returns:
233+
HTML string for the alumni entry
234+
"""
235+
name = alum.get('name', '')
236+
years = alum.get('years', '')
237+
238+
if years:
239+
return f'{name} ({years})'
240+
return name
241+
242+
243+
def generate_undergrad_list_content(alumni: List[Dict[str, Any]]) -> str:
244+
"""Generate HTML content for undergraduate alumni list.
245+
246+
Args:
247+
alumni: List of alumni dictionaries
248+
249+
Returns:
250+
HTML string with alumni entries separated by <br>
251+
"""
252+
if not alumni:
253+
return ''
254+
255+
entries = [generate_undergrad_entry(a) for a in alumni]
256+
return '<br>\n '.join(entries)
257+
258+
259+
def generate_collaborator_entry(collab: Dict[str, Any]) -> str:
260+
"""Generate HTML for a single collaborator entry.
261+
262+
Args:
263+
collab: Dictionary with collaborator data (name, url, description)
264+
265+
Returns:
266+
HTML string for the collaborator paragraph
267+
"""
268+
name = collab.get('name', '')
269+
url = collab.get('url', '')
270+
description = collab.get('description', '')
271+
272+
# The description already contains the full text, but we need to replace
273+
# the name portion with a link
274+
if url:
275+
# If description starts with name, replace it with linked version
276+
if description.startswith(name):
277+
linked_name = f'<a href="{url}" target="_blank">{name}</a>'
278+
description = linked_name + description[len(name):]
279+
else:
280+
# Otherwise just create the link
281+
description = f'<a href="{url}" target="_blank">{name}</a>'
282+
283+
return f'<p>{description}</p>'
284+
285+
286+
def generate_collaborators_content(collaborators: List[Dict[str, Any]]) -> str:
287+
"""Generate HTML content for collaborators section.
288+
289+
Args:
290+
collaborators: List of collaborator dictionaries
291+
292+
Returns:
293+
HTML string with all collaborator paragraphs
294+
"""
295+
if not collaborators:
296+
return ''
297+
298+
entries = [generate_collaborator_entry(c) for c in collaborators]
299+
return '\n '.join(entries)
300+
301+
302+
def build_people(
303+
data_path: Path,
304+
template_path: Path,
305+
output_path: Path
306+
) -> None:
307+
"""Build people.html from data and template.
308+
309+
Args:
310+
data_path: Path to people.xlsx
311+
template_path: Path to template HTML file
312+
output_path: Path for generated HTML file
313+
"""
314+
# Load data
315+
data = load_people(data_path)
316+
317+
# Generate content for each section
318+
director_content = ''
319+
if data.get('director'):
320+
director_content = generate_director_content(data['director'][0])
321+
322+
replacements = {
323+
'DIRECTOR_CONTENT': director_content,
324+
'MEMBERS_CONTENT': generate_members_content(data.get('members', [])),
325+
'ALUMNI_POSTDOCS_CONTENT': generate_alumni_list_content(data.get('alumni_postdocs', [])),
326+
'ALUMNI_GRADS_CONTENT': generate_alumni_list_content(data.get('alumni_grads', [])),
327+
'ALUMNI_MANAGERS_CONTENT': generate_alumni_list_content(data.get('alumni_managers', [])),
328+
'ALUMNI_UNDERGRADS_CONTENT': generate_undergrad_list_content(data.get('alumni_undergrads', [])),
329+
'COLLABORATORS_CONTENT': generate_collaborators_content(data.get('collaborators', [])),
330+
}
331+
332+
# Inject into template
333+
inject_content(template_path, output_path, replacements)
334+
335+
# Report
336+
total = sum(len(items) for items in data.values())
337+
print(f"Generated {output_path} with {total} people entries")
338+
339+
340+
def main():
341+
"""Main entry point for CLI usage."""
342+
project_root = Path(__file__).parent.parent
343+
data_path = project_root / 'data' / 'people.xlsx'
344+
template_path = project_root / 'templates' / 'people.html'
345+
output_path = project_root / 'people.html'
346+
347+
build_people(data_path, template_path, output_path)
348+
349+
350+
if __name__ == '__main__':
351+
main()

0 commit comments

Comments
 (0)