|
| 1 | +"""Parse lab_manual.tex to extract member and alumni data. |
| 2 | +
|
| 3 | +Parses the 'Lab members and alumni' chapter from the ContextLab lab-manual |
| 4 | +repository's lab_manual.tex file. Extracts names, roles, year ranges, and |
| 5 | +active/alumni status. |
| 6 | +""" |
| 7 | +import re |
| 8 | +import subprocess |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | + |
| 12 | +def parse_members_chapter(tex_path): |
| 13 | + """Extract all member/alumni entries from lab_manual.tex. |
| 14 | +
|
| 15 | + Args: |
| 16 | + tex_path: Path to lab_manual.tex file. |
| 17 | +
|
| 18 | + Returns: |
| 19 | + List of dicts with keys: name, role_category, start_year, |
| 20 | + end_year (None if active), is_active, raw_line. |
| 21 | + """ |
| 22 | + tex_path = Path(tex_path) |
| 23 | + content = tex_path.read_text(encoding='utf-8') |
| 24 | + |
| 25 | + # Extract the members chapter |
| 26 | + chapter_match = re.search( |
| 27 | + r'\\chapter\{Lab members and alumni\}.*?\\begin\{fullwidth\}(.*?)\\end\{fullwidth\}', |
| 28 | + content, re.DOTALL |
| 29 | + ) |
| 30 | + if not chapter_match: |
| 31 | + raise ValueError(f"Could not find 'Lab members and alumni' chapter in {tex_path}") |
| 32 | + |
| 33 | + chapter_text = chapter_match.group(1) |
| 34 | + |
| 35 | + # Split into Current and Alumni sections |
| 36 | + subsection_pattern = r'\\subsection\{(.*?)\}' |
| 37 | + subsection_splits = re.split(subsection_pattern, chapter_text) |
| 38 | + |
| 39 | + # subsection_splits: [before_first, title1, content1, title2, content2, ...] |
| 40 | + sections = {} |
| 41 | + for i in range(1, len(subsection_splits), 2): |
| 42 | + title = subsection_splits[i].strip() |
| 43 | + body = subsection_splits[i + 1] if i + 1 < len(subsection_splits) else '' |
| 44 | + sections[title] = body |
| 45 | + |
| 46 | + records = [] |
| 47 | + |
| 48 | + for section_title, section_body in sections.items(): |
| 49 | + is_active = 'current' in section_title.lower() |
| 50 | + _parse_section(section_body, is_active, records) |
| 51 | + |
| 52 | + return records |
| 53 | + |
| 54 | + |
| 55 | +def _parse_section(section_body, is_active, records): |
| 56 | + """Parse a section (Current or Alumni) for role groups and entries.""" |
| 57 | + # Split by \newthought{Role} |
| 58 | + thought_pattern = r'\\newthought\{(.*?)\}' |
| 59 | + parts = re.split(thought_pattern, section_body) |
| 60 | + |
| 61 | + # parts: [before_first, role1, content1, role2, content2, ...] |
| 62 | + for i in range(1, len(parts), 2): |
| 63 | + role_category = parts[i].strip() |
| 64 | + role_content = parts[i + 1] if i + 1 < len(parts) else '' |
| 65 | + |
| 66 | + # Skip commented-out sections (all lines start with %) |
| 67 | + uncommented_lines = [ |
| 68 | + line for line in role_content.split('\n') |
| 69 | + if line.strip() and not line.strip().startswith('%') |
| 70 | + ] |
| 71 | + if not uncommented_lines: |
| 72 | + continue |
| 73 | + |
| 74 | + # Handle PI special case (no list wrapper) |
| 75 | + if role_category == 'PI': |
| 76 | + _parse_pi_entry(role_content, role_category, is_active, records) |
| 77 | + continue |
| 78 | + |
| 79 | + # Parse \item entries |
| 80 | + _parse_list_entries(role_content, role_category, is_active, records) |
| 81 | + |
| 82 | + |
| 83 | +def _parse_pi_entry(content, role_category, is_active, records): |
| 84 | + """Parse PI entry which has no list wrapper.""" |
| 85 | + # Format: \enskip Name (YYYY -- ) or just Name (YYYY -- ) |
| 86 | + pattern = r'(?:\\enskip\s+)?([A-Z][\w\s.]+?)\s*\((\d{4})\s*--\s*(\d{4})?\s*\)?' |
| 87 | + for match in re.finditer(pattern, content): |
| 88 | + name = match.group(1).strip() |
| 89 | + start_year = int(match.group(2)) |
| 90 | + end_year = int(match.group(3)) if match.group(3) else None |
| 91 | + records.append({ |
| 92 | + 'name': name, |
| 93 | + 'role_category': role_category, |
| 94 | + 'start_year': start_year, |
| 95 | + 'end_year': end_year, |
| 96 | + 'is_active': is_active and end_year is None, |
| 97 | + 'raw_line': match.group(0).strip(), |
| 98 | + }) |
| 99 | + |
| 100 | + |
| 101 | +def _parse_list_entries(content, role_category, is_active, records): |
| 102 | + """Parse \\item entries from list blocks.""" |
| 103 | + # Match \item Name (YYYY -- YYYY) or \item Name (YYYY) or \item Name (YYYY --) |
| 104 | + item_pattern = r'\\item\s+(.+?)\s*\((\d{4})(?:\s*--\s*(\d{4})?)?\s*\)' |
| 105 | + for line in content.split('\n'): |
| 106 | + stripped = line.strip() |
| 107 | + if stripped.startswith('%'): |
| 108 | + continue |
| 109 | + match = re.search(item_pattern, stripped) |
| 110 | + if match: |
| 111 | + name = match.group(1).strip() |
| 112 | + start_year = int(match.group(2)) |
| 113 | + end_str = match.group(3) |
| 114 | + end_year = int(end_str) if end_str else None |
| 115 | + records.append({ |
| 116 | + 'name': name, |
| 117 | + 'role_category': role_category, |
| 118 | + 'start_year': start_year, |
| 119 | + 'end_year': end_year, |
| 120 | + 'is_active': is_active and end_year is None, |
| 121 | + 'raw_line': stripped, |
| 122 | + }) |
| 123 | + |
| 124 | + |
| 125 | +def add_member_to_lab_manual(tex_path, name, role, start_year): |
| 126 | + """Add a new member to the Current lab members section. |
| 127 | +
|
| 128 | + Args: |
| 129 | + tex_path: Path to lab_manual.tex. |
| 130 | + name: Full name of the member. |
| 131 | + role: Role category (e.g., 'Graduate Students', 'Undergraduate RAs'). |
| 132 | + start_year: Start year as int. |
| 133 | + """ |
| 134 | + tex_path = Path(tex_path) |
| 135 | + content = tex_path.read_text(encoding='utf-8') |
| 136 | + |
| 137 | + # Map common role names to lab-manual role headings |
| 138 | + role_map = { |
| 139 | + 'postdoc': 'Postdoctoral Researchers', |
| 140 | + 'grad student': 'Graduate Students', |
| 141 | + 'graduate student': 'Graduate Students', |
| 142 | + 'undergrad': 'Undergraduate RAs', |
| 143 | + 'undergraduate': 'Undergraduate RAs', |
| 144 | + 'lab manager': 'Lab Managers', |
| 145 | + 'research assistant': 'Research Assistants', |
| 146 | + } |
| 147 | + role_heading = role_map.get(role.lower(), role) |
| 148 | + |
| 149 | + new_item = f'\\item {name} ({start_year} -- )' |
| 150 | + |
| 151 | + # Find the role section under Current lab members |
| 152 | + # Look for \newthought{Role} followed by a list block |
| 153 | + pattern = ( |
| 154 | + r'(\\subsection\{Current lab members\}.*?' |
| 155 | + r'\\newthought\{' + re.escape(role_heading) + r'\}.*?' |
| 156 | + r'\\begin\{list\}\{\\quad\}\{\})' |
| 157 | + r'(.*?)' |
| 158 | + r'(\\end\{list\})' |
| 159 | + ) |
| 160 | + match = re.search(pattern, content, re.DOTALL) |
| 161 | + if not match: |
| 162 | + raise ValueError( |
| 163 | + f"Could not find '{role_heading}' section under " |
| 164 | + f"'Current lab members' in {tex_path}" |
| 165 | + ) |
| 166 | + |
| 167 | + # Insert new item before \end{list} |
| 168 | + before = match.group(1) + match.group(2).rstrip() |
| 169 | + new_content = content[:match.start()] + before + '\n' + new_item + '\n' + match.group(3) + content[match.end():] |
| 170 | + tex_path.write_text(new_content, encoding='utf-8') |
| 171 | + |
| 172 | + |
| 173 | +def move_member_to_alumni(tex_path, name, end_year): |
| 174 | + """Move a member from Current to Alumni section. |
| 175 | +
|
| 176 | + Args: |
| 177 | + tex_path: Path to lab_manual.tex. |
| 178 | + name: Full name of the member. |
| 179 | + end_year: End year as int. |
| 180 | + """ |
| 181 | + tex_path = Path(tex_path) |
| 182 | + content = tex_path.read_text(encoding='utf-8') |
| 183 | + |
| 184 | + # Find the member in Current section |
| 185 | + # Match the \item line with their name |
| 186 | + item_pattern = re.compile( |
| 187 | + r'^(\s*)\\item\s+' + re.escape(name) + r'\s*\((\d{4})\s*--\s*\)', |
| 188 | + re.MULTILINE |
| 189 | + ) |
| 190 | + |
| 191 | + # Only match within Current lab members section |
| 192 | + current_section_match = re.search( |
| 193 | + r'\\subsection\{Current lab members\}(.*?)\\subsection\{Lab alumni\}', |
| 194 | + content, re.DOTALL |
| 195 | + ) |
| 196 | + if not current_section_match: |
| 197 | + raise ValueError("Could not find Current lab members section") |
| 198 | + |
| 199 | + current_start = current_section_match.start(1) |
| 200 | + current_end = current_section_match.end(1) |
| 201 | + current_text = current_section_match.group(1) |
| 202 | + |
| 203 | + item_match = item_pattern.search(current_text) |
| 204 | + if not item_match: |
| 205 | + raise ValueError(f"Could not find '{name}' in Current lab members section") |
| 206 | + |
| 207 | + start_year = item_match.group(2) |
| 208 | + |
| 209 | + # Determine role category by finding the \newthought before this item |
| 210 | + item_pos = item_match.start() |
| 211 | + role_matches = list(re.finditer(r'\\newthought\{(.*?)\}', current_text[:item_pos])) |
| 212 | + if not role_matches: |
| 213 | + raise ValueError(f"Could not determine role for '{name}'") |
| 214 | + role_category = role_matches[-1].group(1) |
| 215 | + |
| 216 | + # Remove from current section |
| 217 | + abs_start = current_start + item_match.start() |
| 218 | + abs_end = current_start + item_match.end() |
| 219 | + # Remove the full line including newline |
| 220 | + line_start = content.rfind('\n', 0, abs_start) + 1 |
| 221 | + line_end = content.find('\n', abs_end) |
| 222 | + if line_end == -1: |
| 223 | + line_end = len(content) |
| 224 | + else: |
| 225 | + line_end += 1 # include the newline |
| 226 | + |
| 227 | + content = content[:line_start] + content[line_end:] |
| 228 | + |
| 229 | + # Add to alumni section with closed year range |
| 230 | + alumni_item = f'\\item {name} ({start_year} -- {end_year})' |
| 231 | + |
| 232 | + # Find the role section under Lab alumni |
| 233 | + pattern = ( |
| 234 | + r'(\\subsection\{Lab alumni\}.*?' |
| 235 | + r'\\newthought\{' + re.escape(role_category) + r'\}.*?' |
| 236 | + r'\\begin\{list\}\{\\quad\}\{\})' |
| 237 | + r'(.*?)' |
| 238 | + r'(\\end\{list\})' |
| 239 | + ) |
| 240 | + match = re.search(pattern, content, re.DOTALL) |
| 241 | + if not match: |
| 242 | + raise ValueError( |
| 243 | + f"Could not find '{role_category}' alumni section in {tex_path}" |
| 244 | + ) |
| 245 | + |
| 246 | + before = match.group(1) + match.group(2).rstrip() |
| 247 | + content = content[:match.start()] + before + '\n' + alumni_item + '\n' + match.group(3) + content[match.end():] |
| 248 | + |
| 249 | + tex_path.write_text(content, encoding='utf-8') |
| 250 | + |
| 251 | + |
| 252 | +def commit_and_push_lab_manual(submodule_path, message): |
| 253 | + """Commit and push changes in the lab-manual submodule. |
| 254 | +
|
| 255 | + Args: |
| 256 | + submodule_path: Path to the lab-manual submodule directory. |
| 257 | + message: Commit message. |
| 258 | +
|
| 259 | + Raises: |
| 260 | + RuntimeError: If git operations fail. |
| 261 | + """ |
| 262 | + submodule_path = Path(submodule_path) |
| 263 | + if not (submodule_path / '.git').exists() and not (submodule_path / 'lab_manual.tex').exists(): |
| 264 | + raise RuntimeError( |
| 265 | + f"Lab-manual submodule not initialized at {submodule_path}. " |
| 266 | + f"Run: git submodule update --init" |
| 267 | + ) |
| 268 | + |
| 269 | + try: |
| 270 | + subprocess.run( |
| 271 | + ['git', 'add', 'lab_manual.tex'], |
| 272 | + cwd=submodule_path, check=True, capture_output=True, text=True |
| 273 | + ) |
| 274 | + # Check if there are staged changes |
| 275 | + result = subprocess.run( |
| 276 | + ['git', 'diff', '--cached', '--quiet'], |
| 277 | + cwd=submodule_path, capture_output=True |
| 278 | + ) |
| 279 | + if result.returncode == 0: |
| 280 | + return # Nothing to commit |
| 281 | + |
| 282 | + subprocess.run( |
| 283 | + ['git', 'commit', '-m', message], |
| 284 | + cwd=submodule_path, check=True, capture_output=True, text=True |
| 285 | + ) |
| 286 | + subprocess.run( |
| 287 | + ['git', 'push', 'origin', 'master'], |
| 288 | + cwd=submodule_path, check=True, capture_output=True, text=True |
| 289 | + ) |
| 290 | + except subprocess.CalledProcessError as e: |
| 291 | + raise RuntimeError( |
| 292 | + f"Failed to commit/push lab-manual changes: {e.stderr or e.stdout}" |
| 293 | + ) from e |
0 commit comments