Skip to content

Commit 0ee5da8

Browse files
committed
Improve section parsing for unconventional README formats
Handle bold pseudo-headings, sub-heading groups, and code fence tracking
1 parent 8fac51c commit 0ee5da8

7 files changed

Lines changed: 544 additions & 31 deletions

File tree

hdi

Lines changed: 188 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,19 @@ parse_sections() {
127127
local body=""
128128
local prev_line=""
129129
local have_prev=false
130+
local ps_in_code=false
131+
local ps_fence_char=""
130132

131133
shopt -s nocasematch
132134

133-
# Helper: process a detected heading at a given level
135+
# Helper: process a detected heading at a given level.
136+
# Sets _ps_started_new=true when a new section is started, so the
137+
# caller knows whether non-matching sub-headings should be kept in body.
138+
_ps_started_new=false
139+
134140
_ps_handle_heading() {
135141
local text="$1" level="$2"
142+
_ps_started_new=false
136143
# Strip trailing ATX closing hashes
137144
if [[ "$text" =~ ^(.*[^#[:space:]])[[:space:]]*#+ ]]; then
138145
text="${BASH_REMATCH[1]}"
@@ -143,28 +150,73 @@ parse_sections() {
143150
while [[ "$text" =~ ^(.*)__([^_]+)__(.*) ]]; do text="${BASH_REMATCH[1]}${BASH_REMATCH[2]}${BASH_REMATCH[3]}"; done
144151
while [[ "$text" =~ ^(.*)_([^_]+)_(.*) ]]; do text="${BASH_REMATCH[1]}${BASH_REMATCH[2]}${BASH_REMATCH[3]}"; done
145152

146-
if $in_section && (( level <= section_level )); then
147-
SECTION_TITLES+=("$heading_text")
148-
SECTION_BODIES+=("$body")
149-
in_section=false
150-
body=""
153+
# Save the current section if a same-or-higher-level heading arrives,
154+
# or if a deeper-level heading also matches (avoids losing parent body)
155+
if $in_section; then
156+
if (( level <= section_level )); then
157+
SECTION_TITLES+=("$heading_text")
158+
SECTION_BODIES+=("$body")
159+
in_section=false
160+
body=""
161+
elif [[ "$text" =~ $PATTERN ]]; then
162+
# Deeper child heading also matches — save parent body first
163+
SECTION_TITLES+=("$heading_text")
164+
SECTION_BODIES+=("$body")
165+
in_section=false
166+
body=""
167+
fi
151168
fi
152169

153170
if [[ "$text" =~ $PATTERN ]]; then
154171
in_section=true
155172
section_level=$level
156173
heading_text="$text"
157174
body=""
175+
_ps_started_new=true
158176
fi
159177
}
160178

161179
while IFS= read -r line; do
180+
# Track fenced code blocks so comments/content inside them
181+
# are not misinterpreted as headings
182+
if [[ "$line" =~ ^[[:space:]]*(\`{3,}|~{3,}) ]]; then
183+
local ps_matched="${BASH_REMATCH[1]}"
184+
if $ps_in_code; then
185+
[[ "${ps_matched:0:1}" == "$ps_fence_char" ]] && ps_in_code=false && ps_fence_char=""
186+
else
187+
ps_in_code=true
188+
ps_fence_char="${ps_matched:0:1}"
189+
fi
190+
# Still include fence lines in body if we're in a section
191+
if $have_prev; then
192+
if $in_section; then body+="$prev_line"$'\n'; fi
193+
have_prev=false; prev_line=""
194+
fi
195+
if $in_section; then body+="$line"$'\n'; fi
196+
continue
197+
fi
198+
199+
# Inside a code block — skip heading detection, just accumulate body
200+
if $ps_in_code; then
201+
if $have_prev; then
202+
if $in_section; then body+="$prev_line"$'\n'; fi
203+
have_prev=false; prev_line=""
204+
fi
205+
if $in_section; then body+="$line"$'\n'; fi
206+
continue
207+
fi
208+
162209
# Check for setext underline: current line is ===+ or ---+ (3+ chars)
163210
if $have_prev && [[ "$line" =~ ^[[:space:]]*(={3,}|-{3,})[[:space:]]*$ ]]; then
164211
local setext_char="${BASH_REMATCH[1]:0:1}"
165212
local level=2
166213
[[ "$setext_char" == "=" ]] && level=1
167214
_ps_handle_heading "$prev_line" "$level"
215+
# Keep non-matching setext sub-headings in body (as ATX) for sub-grouping
216+
if $in_section && ! $_ps_started_new; then
217+
if (( level == 1 )); then body+="# $prev_line"$'\n'
218+
else body+="## $prev_line"$'\n'; fi
219+
fi
168220
have_prev=false
169221
prev_line=""
170222
continue
@@ -177,6 +229,22 @@ parse_sections() {
177229
local level=${#hashes}
178230
local text="${BASH_REMATCH[2]}"
179231
_ps_handle_heading "$text" "$level"
232+
# Keep non-matching ATX sub-headings in body for sub-grouping
233+
if $in_section && ! $_ps_started_new; then
234+
body+="$prev_line"$'\n'
235+
fi
236+
elif [[ "$prev_line" =~ ^[[:space:]]*\*\*([^*]+)\*\*[[:space:]]*$ ]]; then
237+
# Bold pseudo-heading: when not inside a real heading's section,
238+
# check against keywords so e.g. **Run application** matches run mode.
239+
# When inside a real section, keep it in body for sub-group display.
240+
local bold_text="${BASH_REMATCH[1]}"
241+
_ps_started_new=false
242+
if ! $in_section || (( section_level == 7 )); then
243+
_ps_handle_heading "$bold_text" 7
244+
fi
245+
if $in_section && ! $_ps_started_new; then
246+
body+="$prev_line"$'\n'
247+
fi
180248
elif $in_section; then
181249
body+="$prev_line"$'\n'
182250
fi
@@ -187,12 +255,24 @@ parse_sections() {
187255
done
188256

189257
# Process final buffered line
190-
if $have_prev; then
258+
if $have_prev && ! $ps_in_code; then
191259
if [[ "$prev_line" =~ ^(#{1,6})[[:space:]]+(.*) ]]; then
192260
local hashes="${BASH_REMATCH[1]}"
193261
local level=${#hashes}
194262
local text="${BASH_REMATCH[2]}"
195263
_ps_handle_heading "$text" "$level"
264+
if $in_section && ! $_ps_started_new; then
265+
body+="$prev_line"$'\n'
266+
fi
267+
elif [[ "$prev_line" =~ ^[[:space:]]*\*\*([^*]+)\*\*[[:space:]]*$ ]]; then
268+
local bold_text="${BASH_REMATCH[1]}"
269+
_ps_started_new=false
270+
if ! $in_section || (( section_level == 7 )); then
271+
_ps_handle_heading "$bold_text" 7
272+
fi
273+
if $in_section && ! $_ps_started_new; then
274+
body+="$prev_line"$'\n'
275+
fi
196276
elif $in_section; then
197277
body+="$prev_line"$'\n'
198278
fi
@@ -275,6 +355,15 @@ strip_prompt() {
275355
fi
276356
}
277357

358+
# Extract commands from section body text.
359+
# Sets _EC_RESULT instead of printing to stdout (avoids subshell per call).
360+
#
361+
# When _EC_GROUPED=true, sub-headings (##+ ) and bold pseudo-headings
362+
# (**text**) in the prose are emitted as marker lines prefixed with \x01
363+
# so the caller can create display sub-groups without a second pass.
364+
_EC_GROUPED=false
365+
_EC_SUBHDR=$'\x01'
366+
278367
extract_commands() {
279368
local content="$1"
280369
local commands=""
@@ -284,6 +373,7 @@ extract_commands() {
284373
local continuation_buf=""
285374
local console_mode=false
286375
local indented_buf=""
376+
local line iline inline_cmds inline_cmd
287377

288378
while IFS= read -r line; do
289379
if [[ "$line" =~ ^[[:space:]]*(\`{3,}|~{3,}) ]]; then
@@ -346,6 +436,22 @@ extract_commands() {
346436
commands+="$stripped"$'\n'
347437
fi
348438
elif ! $in_code; then
439+
# Detect sub-headings and bold pseudo-headings for display grouping
440+
if $_EC_GROUPED; then
441+
if [[ "$line" =~ ^#{2,6}[[:space:]]+(.*) ]]; then
442+
local _ec_label="${BASH_REMATCH[1]}"
443+
if [[ "$_ec_label" =~ ^(.*[^#[:space:]])[[:space:]]*#+ ]]; then
444+
_ec_label="${BASH_REMATCH[1]}"
445+
fi
446+
while [[ "$_ec_label" =~ ^(.*)\*\*([^*]+)\*\*(.*) ]]; do _ec_label="${BASH_REMATCH[1]}${BASH_REMATCH[2]}${BASH_REMATCH[3]}"; done
447+
while [[ "$_ec_label" =~ ^(.*)\*([^*]+)\*(.*) ]]; do _ec_label="${BASH_REMATCH[1]}${BASH_REMATCH[2]}${BASH_REMATCH[3]}"; done
448+
commands+="${_EC_SUBHDR}${_ec_label}"$'\n'
449+
# Fall through to check inline backtick cmds in heading text
450+
elif [[ "$line" =~ ^[[:space:]]*\*\*([^*]+)\*\*[[:space:]]*$ ]]; then
451+
commands+="${_EC_SUBHDR}${BASH_REMATCH[1]}"$'\n'
452+
continue
453+
fi
454+
fi
349455
# Detect indented code blocks (4+ spaces or tab)
350456
if [[ "$line" =~ ^(\ {4,}| ) ]] && [[ -n "${line//[[:space:]]/}" ]]; then
351457
local dedented="${line# }"
@@ -370,7 +476,7 @@ extract_commands() {
370476
fi
371477

372478
find_backtick_commands "$line"
373-
local inline_cmds="$_FBC_RESULT"
479+
inline_cmds="$_FBC_RESULT"
374480
if [[ -n "$inline_cmds" ]]; then
375481
while IFS= read -r inline_cmd; do
376482
commands+="$inline_cmd"$'\n'
@@ -395,63 +501,103 @@ extract_commands() {
395501
indented_buf=""
396502
fi
397503

398-
printf '%s' "$commands"
504+
_EC_RESULT="$commands"
399505
}
400506

401507
# ── Build a flat list: section headers + commands ────────────────────────────
402-
# Each entry is either a "header" or a "command". We store parallel arrays
403-
# for the display lines, their types, and (for commands) the actual command.
508+
# Each entry is either a "header", "subheader", "command", or "empty".
509+
# We store parallel arrays for the display lines, their types, and
510+
# (for commands) the actual command.
404511

405512
declare -a DISPLAY_LINES=() # what to print
406-
declare -a LINE_TYPES=() # "header" | "command" | "empty"
513+
declare -a LINE_TYPES=() # "header" | "subheader" | "command" | "empty"
407514
declare -a LINE_CMDS=() # the raw command (only for type=command)
408515
declare -a CMD_INDICES=() # indices into DISPLAY_LINES that are commands
409516

410517
build_display_list() {
411518
for i in "${!SECTION_TITLES[@]}"; do
412519
local title="${SECTION_TITLES[$i]}"
413520
local body="${SECTION_BODIES[$i]}"
414-
local cmds
415-
cmds=$(extract_commands "$body")
521+
522+
# Section header
523+
DISPLAY_LINES+=("$title")
524+
LINE_TYPES+=("header")
525+
LINE_CMDS+=("")
416526

417527
# Extract commands from backtick-wrapped text in the heading itself
418528
# (eg. ### `yarn start` — the command is the heading)
419529
find_backtick_commands "$title" false
420530
local title_cmds="$_FBC_RESULT"
531+
local has_cmds=false
421532
if [[ -n "$title_cmds" ]]; then
422533
while IFS= read -r tcmd; do
423-
cmds="$tcmd"$'\n'"$cmds"
534+
[[ -z "$tcmd" ]] && continue
535+
has_cmds=true
536+
CMD_INDICES+=("${#DISPLAY_LINES[@]}")
537+
DISPLAY_LINES+=("$tcmd")
538+
LINE_TYPES+=("command")
539+
LINE_CMDS+=("$tcmd")
424540
done <<< "$title_cmds"
425541
fi
426542

427-
# Deduplicate commands (pure bash, no awk subprocess)
543+
# Extract commands with sub-group markers (single pass over the body).
544+
# Sub-headings and bold pseudo-headings produce \x01-prefixed marker
545+
# lines; everything else is a command or empty line.
546+
_EC_GROUPED=true
547+
extract_commands "$body"
548+
_EC_GROUPED=false
549+
local cmds="$_EC_RESULT"
550+
551+
# Deduplicate commands within each sub-group (pure bash, no awk)
428552
if [[ -n "$cmds" ]]; then
429-
local _deduped="" _dup
553+
local _deduped="" _dup _cur_group="" _group_seen=""
430554
while IFS= read -r _cmd; do
431555
[[ -z "$_cmd" ]] && continue
556+
# Sub-header marker — reset per-group seen list
557+
if [[ "$_cmd" == "$_EC_SUBHDR"* ]]; then
558+
_deduped+="$_cmd"$'\n'
559+
_group_seen=""
560+
continue
561+
fi
432562
_dup=false
433-
while IFS= read -r _existing; do
434-
[[ "$_existing" == "$_cmd" ]] && _dup=true && break
435-
done <<< "$_deduped"
436-
$_dup || _deduped+="$_cmd"$'\n'
563+
if [[ -n "$_group_seen" ]]; then
564+
while IFS= read -r _existing; do
565+
[[ "$_existing" == "$_cmd" ]] && _dup=true && break
566+
done <<< "$_group_seen"
567+
fi
568+
if ! $_dup; then
569+
_deduped+="$_cmd"$'\n'
570+
_group_seen+="$_cmd"$'\n'
571+
fi
437572
done <<< "$cmds"
438573
cmds="$_deduped"
439574
fi
440575

441-
# Section header
442-
DISPLAY_LINES+=("$title")
443-
LINE_TYPES+=("header")
444-
LINE_CMDS+=("")
445-
576+
# Build display entries from the flat command list with markers
446577
if [[ -n "$cmds" ]]; then
447-
while IFS= read -r cmd; do
448-
[[ -z "$cmd" ]] && continue
578+
local _pending_label=""
579+
while IFS= read -r _entry; do
580+
[[ -z "$_entry" ]] && continue
581+
if [[ "$_entry" == "$_EC_SUBHDR"* ]]; then
582+
_pending_label="${_entry:1}"
583+
continue
584+
fi
585+
# Emit the sub-header only when its group has commands
586+
if [[ -n "$_pending_label" ]]; then
587+
DISPLAY_LINES+=("$_pending_label")
588+
LINE_TYPES+=("subheader")
589+
LINE_CMDS+=("")
590+
_pending_label=""
591+
fi
592+
has_cmds=true
449593
CMD_INDICES+=("${#DISPLAY_LINES[@]}")
450-
DISPLAY_LINES+=("$cmd")
594+
DISPLAY_LINES+=("$_entry")
451595
LINE_TYPES+=("command")
452-
LINE_CMDS+=("$cmd")
596+
LINE_CMDS+=("$_entry")
453597
done <<< "$cmds"
454-
else
598+
fi
599+
600+
if ! $has_cmds; then
455601
DISPLAY_LINES+=("(no commands — use --full to see prose)")
456602
LINE_TYPES+=("empty")
457603
LINE_CMDS+=("")
@@ -478,6 +624,13 @@ render_static() {
478624
printf "\n%s%s ▸ %s%s\n" "$BOLD" "$CYAN" "$line" "$RESET"
479625
fi
480626
;;
627+
subheader)
628+
if $RAW; then
629+
printf "\n### %s\n" "$line"
630+
else
631+
printf "\n %s%s%s%s\n" "$BOLD" "$MAGENTA" "$line" "$RESET"
632+
fi
633+
;;
481634
command)
482635
if $RAW; then
483636
printf "%s\n" "$line"
@@ -648,6 +801,10 @@ draw_picker() {
648801
printf "\n%s%s ▸ %s%s\n" "$BOLD" "$CYAN" "$line" "$RESET"
649802
(( count += 2 ))
650803
;;
804+
subheader)
805+
printf "\n %s%s%s%s\n" "$BOLD" "$MAGENTA" "$line" "$RESET"
806+
(( count += 2 ))
807+
;;
651808
command)
652809
if (( idx == selected )) && [[ -n "$FLASH_MSG" ]]; then
653810
printf " %s%s✔ %s %s\n" "$BG_SELECT" "$GREEN$BOLD" "$line" "$RESET"
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Training App
2+
3+
## Set up
4+
5+
**Install dependencies**
6+
7+
```bash
8+
npm install
9+
```
10+
11+
**Configure environment**
12+
13+
```bash
14+
cp .env.example .env
15+
```
16+
17+
**Build the application**
18+
19+
```bash
20+
npm run build
21+
```
22+
23+
**Start the server**
24+
25+
```bash
26+
npm start
27+
```
28+
29+
## Testing
30+
31+
**Unit tests**
32+
33+
```bash
34+
npm test
35+
```
36+
37+
**Integration tests**
38+
39+
```bash
40+
npm run test:integration
41+
```

0 commit comments

Comments
 (0)