Skip to content

Commit 840e2ed

Browse files
jeremymanningclaude
andcommitted
Add lab-manual submodule and LaTeX parser (Phases 1-2)
- Added ContextLab/lab-manual as Git submodule at lab-manual/ - Created scripts/parse_lab_manual.py with parser for lab_manual.tex members chapter, plus helpers for adding/moving members and committing/pushing submodule changes - Created tests/test_parse_lab_manual.py with 17 tests (all passing) - Updated build-content.yml to checkout with submodules - Added full speckit artifacts (spec, plan, research, data-model, tasks) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c83794c commit 840e2ed

12 files changed

Lines changed: 1469 additions & 0 deletions

File tree

.github/workflows/build-content.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ jobs:
2626
steps:
2727
- name: Checkout repository
2828
uses: actions/checkout@v4
29+
with:
30+
submodules: true
2931

3032
- name: Set up Python
3133
uses: actions/setup-python@v5

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "lab-manual"]
2+
path = lab-manual
3+
url = https://github.com/ContextLab/lab-manual.git

lab-manual

Submodule lab-manual added at c31674c

scripts/parse_lab_manual.py

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
"""Parse lab_manual.tex to extract member and alumni data.
2+
3+
Parses the 'Lab members and alumni' chapter from the ContextLab lab-manual
4+
repository's lab_manual.tex file. Extracts names, roles, year ranges, and
5+
active/alumni status.
6+
"""
7+
import re
8+
import subprocess
9+
from pathlib import Path
10+
11+
12+
def parse_members_chapter(tex_path):
13+
"""Extract all member/alumni entries from lab_manual.tex.
14+
15+
Args:
16+
tex_path: Path to lab_manual.tex file.
17+
18+
Returns:
19+
List of dicts with keys: name, role_category, start_year,
20+
end_year (None if active), is_active, raw_line.
21+
"""
22+
tex_path = Path(tex_path)
23+
content = tex_path.read_text(encoding='utf-8')
24+
25+
# Extract the members chapter
26+
chapter_match = re.search(
27+
r'\\chapter\{Lab members and alumni\}.*?\\begin\{fullwidth\}(.*?)\\end\{fullwidth\}',
28+
content, re.DOTALL
29+
)
30+
if not chapter_match:
31+
raise ValueError(f"Could not find 'Lab members and alumni' chapter in {tex_path}")
32+
33+
chapter_text = chapter_match.group(1)
34+
35+
# Split into Current and Alumni sections
36+
subsection_pattern = r'\\subsection\{(.*?)\}'
37+
subsection_splits = re.split(subsection_pattern, chapter_text)
38+
39+
# subsection_splits: [before_first, title1, content1, title2, content2, ...]
40+
sections = {}
41+
for i in range(1, len(subsection_splits), 2):
42+
title = subsection_splits[i].strip()
43+
body = subsection_splits[i + 1] if i + 1 < len(subsection_splits) else ''
44+
sections[title] = body
45+
46+
records = []
47+
48+
for section_title, section_body in sections.items():
49+
is_active = 'current' in section_title.lower()
50+
_parse_section(section_body, is_active, records)
51+
52+
return records
53+
54+
55+
def _parse_section(section_body, is_active, records):
56+
"""Parse a section (Current or Alumni) for role groups and entries."""
57+
# Split by \newthought{Role}
58+
thought_pattern = r'\\newthought\{(.*?)\}'
59+
parts = re.split(thought_pattern, section_body)
60+
61+
# parts: [before_first, role1, content1, role2, content2, ...]
62+
for i in range(1, len(parts), 2):
63+
role_category = parts[i].strip()
64+
role_content = parts[i + 1] if i + 1 < len(parts) else ''
65+
66+
# Skip commented-out sections (all lines start with %)
67+
uncommented_lines = [
68+
line for line in role_content.split('\n')
69+
if line.strip() and not line.strip().startswith('%')
70+
]
71+
if not uncommented_lines:
72+
continue
73+
74+
# Handle PI special case (no list wrapper)
75+
if role_category == 'PI':
76+
_parse_pi_entry(role_content, role_category, is_active, records)
77+
continue
78+
79+
# Parse \item entries
80+
_parse_list_entries(role_content, role_category, is_active, records)
81+
82+
83+
def _parse_pi_entry(content, role_category, is_active, records):
84+
"""Parse PI entry which has no list wrapper."""
85+
# Format: \enskip Name (YYYY -- ) or just Name (YYYY -- )
86+
pattern = r'(?:\\enskip\s+)?([A-Z][\w\s.]+?)\s*\((\d{4})\s*--\s*(\d{4})?\s*\)?'
87+
for match in re.finditer(pattern, content):
88+
name = match.group(1).strip()
89+
start_year = int(match.group(2))
90+
end_year = int(match.group(3)) if match.group(3) else None
91+
records.append({
92+
'name': name,
93+
'role_category': role_category,
94+
'start_year': start_year,
95+
'end_year': end_year,
96+
'is_active': is_active and end_year is None,
97+
'raw_line': match.group(0).strip(),
98+
})
99+
100+
101+
def _parse_list_entries(content, role_category, is_active, records):
102+
"""Parse \\item entries from list blocks."""
103+
# Match \item Name (YYYY -- YYYY) or \item Name (YYYY) or \item Name (YYYY --)
104+
item_pattern = r'\\item\s+(.+?)\s*\((\d{4})(?:\s*--\s*(\d{4})?)?\s*\)'
105+
for line in content.split('\n'):
106+
stripped = line.strip()
107+
if stripped.startswith('%'):
108+
continue
109+
match = re.search(item_pattern, stripped)
110+
if match:
111+
name = match.group(1).strip()
112+
start_year = int(match.group(2))
113+
end_str = match.group(3)
114+
end_year = int(end_str) if end_str else None
115+
records.append({
116+
'name': name,
117+
'role_category': role_category,
118+
'start_year': start_year,
119+
'end_year': end_year,
120+
'is_active': is_active and end_year is None,
121+
'raw_line': stripped,
122+
})
123+
124+
125+
def add_member_to_lab_manual(tex_path, name, role, start_year):
126+
"""Add a new member to the Current lab members section.
127+
128+
Args:
129+
tex_path: Path to lab_manual.tex.
130+
name: Full name of the member.
131+
role: Role category (e.g., 'Graduate Students', 'Undergraduate RAs').
132+
start_year: Start year as int.
133+
"""
134+
tex_path = Path(tex_path)
135+
content = tex_path.read_text(encoding='utf-8')
136+
137+
# Map common role names to lab-manual role headings
138+
role_map = {
139+
'postdoc': 'Postdoctoral Researchers',
140+
'grad student': 'Graduate Students',
141+
'graduate student': 'Graduate Students',
142+
'undergrad': 'Undergraduate RAs',
143+
'undergraduate': 'Undergraduate RAs',
144+
'lab manager': 'Lab Managers',
145+
'research assistant': 'Research Assistants',
146+
}
147+
role_heading = role_map.get(role.lower(), role)
148+
149+
new_item = f'\\item {name} ({start_year} -- )'
150+
151+
# Find the role section under Current lab members
152+
# Look for \newthought{Role} followed by a list block
153+
pattern = (
154+
r'(\\subsection\{Current lab members\}.*?'
155+
r'\\newthought\{' + re.escape(role_heading) + r'\}.*?'
156+
r'\\begin\{list\}\{\\quad\}\{\})'
157+
r'(.*?)'
158+
r'(\\end\{list\})'
159+
)
160+
match = re.search(pattern, content, re.DOTALL)
161+
if not match:
162+
raise ValueError(
163+
f"Could not find '{role_heading}' section under "
164+
f"'Current lab members' in {tex_path}"
165+
)
166+
167+
# Insert new item before \end{list}
168+
before = match.group(1) + match.group(2).rstrip()
169+
new_content = content[:match.start()] + before + '\n' + new_item + '\n' + match.group(3) + content[match.end():]
170+
tex_path.write_text(new_content, encoding='utf-8')
171+
172+
173+
def move_member_to_alumni(tex_path, name, end_year):
174+
"""Move a member from Current to Alumni section.
175+
176+
Args:
177+
tex_path: Path to lab_manual.tex.
178+
name: Full name of the member.
179+
end_year: End year as int.
180+
"""
181+
tex_path = Path(tex_path)
182+
content = tex_path.read_text(encoding='utf-8')
183+
184+
# Find the member in Current section
185+
# Match the \item line with their name
186+
item_pattern = re.compile(
187+
r'^(\s*)\\item\s+' + re.escape(name) + r'\s*\((\d{4})\s*--\s*\)',
188+
re.MULTILINE
189+
)
190+
191+
# Only match within Current lab members section
192+
current_section_match = re.search(
193+
r'\\subsection\{Current lab members\}(.*?)\\subsection\{Lab alumni\}',
194+
content, re.DOTALL
195+
)
196+
if not current_section_match:
197+
raise ValueError("Could not find Current lab members section")
198+
199+
current_start = current_section_match.start(1)
200+
current_end = current_section_match.end(1)
201+
current_text = current_section_match.group(1)
202+
203+
item_match = item_pattern.search(current_text)
204+
if not item_match:
205+
raise ValueError(f"Could not find '{name}' in Current lab members section")
206+
207+
start_year = item_match.group(2)
208+
209+
# Determine role category by finding the \newthought before this item
210+
item_pos = item_match.start()
211+
role_matches = list(re.finditer(r'\\newthought\{(.*?)\}', current_text[:item_pos]))
212+
if not role_matches:
213+
raise ValueError(f"Could not determine role for '{name}'")
214+
role_category = role_matches[-1].group(1)
215+
216+
# Remove from current section
217+
abs_start = current_start + item_match.start()
218+
abs_end = current_start + item_match.end()
219+
# Remove the full line including newline
220+
line_start = content.rfind('\n', 0, abs_start) + 1
221+
line_end = content.find('\n', abs_end)
222+
if line_end == -1:
223+
line_end = len(content)
224+
else:
225+
line_end += 1 # include the newline
226+
227+
content = content[:line_start] + content[line_end:]
228+
229+
# Add to alumni section with closed year range
230+
alumni_item = f'\\item {name} ({start_year} -- {end_year})'
231+
232+
# Find the role section under Lab alumni
233+
pattern = (
234+
r'(\\subsection\{Lab alumni\}.*?'
235+
r'\\newthought\{' + re.escape(role_category) + r'\}.*?'
236+
r'\\begin\{list\}\{\\quad\}\{\})'
237+
r'(.*?)'
238+
r'(\\end\{list\})'
239+
)
240+
match = re.search(pattern, content, re.DOTALL)
241+
if not match:
242+
raise ValueError(
243+
f"Could not find '{role_category}' alumni section in {tex_path}"
244+
)
245+
246+
before = match.group(1) + match.group(2).rstrip()
247+
content = content[:match.start()] + before + '\n' + alumni_item + '\n' + match.group(3) + content[match.end():]
248+
249+
tex_path.write_text(content, encoding='utf-8')
250+
251+
252+
def commit_and_push_lab_manual(submodule_path, message):
253+
"""Commit and push changes in the lab-manual submodule.
254+
255+
Args:
256+
submodule_path: Path to the lab-manual submodule directory.
257+
message: Commit message.
258+
259+
Raises:
260+
RuntimeError: If git operations fail.
261+
"""
262+
submodule_path = Path(submodule_path)
263+
if not (submodule_path / '.git').exists() and not (submodule_path / 'lab_manual.tex').exists():
264+
raise RuntimeError(
265+
f"Lab-manual submodule not initialized at {submodule_path}. "
266+
f"Run: git submodule update --init"
267+
)
268+
269+
try:
270+
subprocess.run(
271+
['git', 'add', 'lab_manual.tex'],
272+
cwd=submodule_path, check=True, capture_output=True, text=True
273+
)
274+
# Check if there are staged changes
275+
result = subprocess.run(
276+
['git', 'diff', '--cached', '--quiet'],
277+
cwd=submodule_path, capture_output=True
278+
)
279+
if result.returncode == 0:
280+
return # Nothing to commit
281+
282+
subprocess.run(
283+
['git', 'commit', '-m', message],
284+
cwd=submodule_path, check=True, capture_output=True, text=True
285+
)
286+
subprocess.run(
287+
['git', 'push', 'origin', 'master'],
288+
cwd=submodule_path, check=True, capture_output=True, text=True
289+
)
290+
except subprocess.CalledProcessError as e:
291+
raise RuntimeError(
292+
f"Failed to commit/push lab-manual changes: {e.stderr or e.stdout}"
293+
) from e
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Specification Quality Checklist: People & Lab-Manual Synchronization
2+
3+
**Purpose**: Validate specification completeness and quality before proceeding to planning
4+
**Created**: 2026-03-23
5+
**Feature**: [spec.md](../spec.md)
6+
7+
## Content Quality
8+
9+
- [x] No implementation details (languages, frameworks, APIs)
10+
- [x] Focused on user value and business needs
11+
- [x] Written for non-technical stakeholders
12+
- [x] All mandatory sections completed
13+
14+
## Requirement Completeness
15+
16+
- [x] No [NEEDS CLARIFICATION] markers remain
17+
- [x] Requirements are testable and unambiguous
18+
- [x] Success criteria are measurable
19+
- [x] Success criteria are technology-agnostic (no implementation details)
20+
- [x] All acceptance scenarios are defined
21+
- [x] Edge cases are identified
22+
- [x] Scope is clearly bounded
23+
- [x] Dependencies and assumptions identified
24+
25+
## Feature Readiness
26+
27+
- [x] All functional requirements have clear acceptance criteria
28+
- [x] User scenarios cover primary flows
29+
- [x] Feature meets measurable outcomes defined in Success Criteria
30+
- [x] No implementation details leak into specification
31+
32+
## Notes
33+
34+
- All items pass. Spec is ready for `/speckit.clarify` or `/speckit.plan`.
35+
- The spec references specific script names (onboard_member.py, offboard_member.py)
36+
and file paths (people.xlsx, JRM_CV.tex) because these are domain entities in this
37+
project, not implementation choices — they are the existing system being extended.

0 commit comments

Comments
 (0)