From e86911d498cdeb74b797ef0e6367ff628a1cfa16 Mon Sep 17 00:00:00 2001 From: AlanAAG Date: Mon, 1 Jun 2026 19:38:05 -0600 Subject: [PATCH 1/3] pdf actions new version --- app/data/action/create_pdf.py | 131 ++++++------------------ app/data/action/edit_pdf.py | 67 +++++++----- app/utils/pdf_format.py | 187 ++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+), 122 deletions(-) create mode 100644 app/utils/pdf_format.py diff --git a/app/data/action/create_pdf.py b/app/data/action/create_pdf.py index 1045c2b2..b6a6ebe6 100644 --- a/app/data/action/create_pdf.py +++ b/app/data/action/create_pdf.py @@ -8,9 +8,8 @@ "Supports headings (# to #####), paragraphs, bullet and numbered lists, " "bold, italic, inline code, fenced code blocks, tables, strikethrough, " "blockquotes, and horizontal rules. " - "The first # heading is rendered as a gradient banner header. " - "Available themes: default (indigo), corporate (blue), minimal (grey), " - "warm (amber), forest (green). " + "The first # heading is rendered as a banner header. " + "Colours, typography, and margins are read from FORMAT.md at render time. " "Use absolute paths only." ), mode="CLI", @@ -39,19 +38,6 @@ "and ~~strikethrough~~." ), }, - "theme": { - "type": "string", - "example": "default", - "description": ( - "Visual colour theme. One of: " - "default (indigo) — general use; " - "corporate (blue) — business, finance, formal reports; " - "minimal (grey) — academic, technical, low-decoration; " - "warm (amber) — creative, personal, informal; " - "forest (green) — sustainability, nature, environmental. " - "Defaults to 'default'." - ), - }, "subtitle": { "type": "string", "example": "Confidential - Internal Use Only", @@ -89,10 +75,11 @@ }, "theme_used": { "type": "string", - "example": "corporate", + "example": "format_md", "description": ( - "The theme that was applied. Useful for downstream actions " - "(e.g. edit_pdf) that need to match colours to the document style." + "Always 'format_md'. Styling is derived from FORMAT.md " + "(accent=#FF4F18, base=#141517, muted=#6B6E76). " + "Useful for downstream actions (e.g. edit_pdf) that need to match colours." ), }, "message": { @@ -116,7 +103,6 @@ def create_pdf_file(input_data: dict) -> dict: simulated_mode = bool(input_data.get("simulated_mode", False)) file_path = str(input_data.get("file_path", "")).strip() content = str(input_data.get("content", "")).strip() - theme = str(input_data.get("theme", "default")).strip().lower() subtitle = str(input_data.get("subtitle", "")).strip() page_numbers = bool(input_data.get("page_numbers", True)) @@ -141,7 +127,7 @@ def create_pdf_file(input_data: dict) -> dict: } if simulated_mode: - return {"status": "success", "path": file_path} + return {"status": "success", "path": file_path, "theme_used": "format_md"} # ── Imports (executor pre-installs via requirement=, this is a fallback) ── import os @@ -168,70 +154,13 @@ def _ensure(pkg, import_as=None): from fpdf import FPDF from fpdf.fonts import TextStyle, FontFace from fpdf.pattern import LinearGradient + from app.config import AGENT_FILE_SYSTEM_PATH + from app.utils.pdf_format import load_style, build_theme as _build_theme - # ── Themes ──────────────────────────────────────────────────────────── - # Keys: hbg=gradient stop colours, accent=link/highlight colour, - # h2/h3=heading colours, body=body text, cbg/cc=code bg/fg, - # rule=accent rule below banner, htxt=banner text - _THEMES = { - "default": { - "hbg": [(30, 58, 138), (79, 70, 229)], - "accent": (79, 70, 229), - "h2": (30, 58, 138), - "h3": (55, 65, 81), - "body": (31, 41, 55), - "cbg": (243, 244, 246), - "cc": (17, 24, 39), - "rule": (199, 210, 254), - "htxt": (255, 255, 255), - }, - "corporate": { - "hbg": [(0, 72, 148), (0, 120, 212)], - "accent": (0, 120, 212), - "h2": (0, 72, 148), - "h3": (60, 60, 100), - "body": (31, 41, 55), - "cbg": (240, 247, 255), - "cc": (0, 72, 148), - "rule": (173, 216, 230), - "htxt": (255, 255, 255), - }, - "minimal": { - "hbg": [(50, 50, 50), (90, 90, 90)], - "accent": (80, 80, 80), - "h2": (40, 40, 40), - "h3": (80, 80, 80), - "body": (40, 40, 40), - "cbg": (245, 245, 245), - "cc": (30, 30, 30), - "rule": (200, 200, 200), - "htxt": (255, 255, 255), - }, - "warm": { - "hbg": [(120, 53, 15), (217, 119, 6)], - "accent": (180, 83, 9), - "h2": (120, 53, 15), - "h3": (92, 72, 44), - "body": (41, 37, 36), - "cbg": (255, 247, 237), - "cc": (120, 53, 15), - "rule": (253, 186, 116), - "htxt": (255, 255, 255), - }, - "forest": { - "hbg": [(20, 83, 45), (34, 197, 94)], - "accent": (22, 163, 74), - "h2": (20, 83, 45), - "h3": (55, 65, 55), - "body": (31, 41, 31), - "cbg": (240, 253, 244), - "cc": (20, 83, 45), - "rule": (134, 239, 172), - "htxt": (255, 255, 255), - }, - } - t = _THEMES.get(theme, _THEMES["default"]) - theme = theme if theme in _THEMES else "default" # resolve fallback for theme_used + # ── Style resolved from FORMAT.md (falls back to CraftBot brand defaults) ── + _fmt = load_style(AGENT_FILE_SYSTEM_PATH / "FORMAT.md") + t = _build_theme(_fmt) + _MARGIN_MM = _fmt["margin_in"] * 25.4 # ── Unicode sanitizer ───────────────────────────────────────────────── # fpdf2's built-in fonts (Helvetica, Courier, Times) only cover latin-1 @@ -317,8 +246,8 @@ def _sanitize(text): # FPDF setup pdf = FPDF() - pdf.set_auto_page_break(auto=True, margin=22) - pdf.set_margins(left=20, top=15, right=20) + pdf.set_auto_page_break(auto=True, margin=_MARGIN_MM) + pdf.set_margins(left=_MARGIN_MM, top=_MARGIN_MM, right=_MARGIN_MM) if doc_title: pdf.set_title(doc_title) pdf.set_creator("CraftBot") @@ -327,7 +256,11 @@ def _sanitize(text): pw = pdf.w - pdf.l_margin - pdf.r_margin # usable page width lm = pdf.l_margin y0 = 8 # banner top y-position - HH = 50 if subtitle else 40 # banner height + # Banner height: scale with FORMAT.md header_height_in but floor at 30mm + # so the title text always fits. FORMAT.md's 0.4" is a nav-bar spec; the + # PDF banner is a title block that needs proportionally more space. + _BASE_H = max(round(_fmt["header_height_in"] * 25.4 * 2.5), 30) + HH = _BASE_H + (10 if subtitle else 0) # ── Gradient banner ─────────────────────────────────────────────── grad = LinearGradient(lm, y0, lm + pw, y0, colors=t["hbg"]) @@ -335,7 +268,7 @@ def _sanitize(text): pdf.rect(lm, y0, pw, HH, style="F") if doc_title: - pdf.set_font("Helvetica", "B", 20) + pdf.set_font("Helvetica", "B", _fmt["h1_pt"]) pdf.set_text_color(*t["htxt"]) title_y = y0 + (HH - 20) / 2 - (5 if subtitle else 0) pdf.set_xy(lm + 8, title_y) @@ -358,7 +291,7 @@ def _sanitize(text): "h1": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=20, + font_size_pt=_fmt["h1_pt"], color=t["h2"], t_margin=10, b_margin=3, @@ -366,7 +299,7 @@ def _sanitize(text): "h2": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=16, + font_size_pt=_fmt["h2_pt"], color=t["h2"], t_margin=8, b_margin=2, @@ -374,7 +307,7 @@ def _sanitize(text): "h3": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=13, + font_size_pt=_fmt["h3_pt"], color=t["h3"], t_margin=6, b_margin=2, @@ -382,7 +315,7 @@ def _sanitize(text): "h4": TextStyle( font_family="Helvetica", font_style="BI", - font_size_pt=11, + font_size_pt=_fmt["body_pt"], color=t["h3"], t_margin=4, b_margin=1, @@ -390,20 +323,20 @@ def _sanitize(text): "h5": TextStyle( font_family="Helvetica", font_style="I", - font_size_pt=10, + font_size_pt=_fmt["small_pt"], color=t["h3"], t_margin=3, b_margin=1, ), "code": TextStyle( font_family="Courier", - font_size_pt=9, + font_size_pt=_fmt["code_pt"], color=t["cc"], fill_color=t["cbg"], ), "pre": TextStyle( font_family="Courier", - font_size_pt=9, + font_size_pt=_fmt["code_pt"], color=t["cc"], fill_color=t["cbg"], ), @@ -411,7 +344,7 @@ def _sanitize(text): } pdf.set_text_color(*t["body"]) - pdf.set_font("Helvetica", size=11) + pdf.set_font("Helvetica", size=_fmt["body_pt"]) pdf.write_html( html_body, font_family="Helvetica", @@ -426,8 +359,8 @@ def _sanitize(text): for pg in range(1, n_pages + 1): pdf.page = pg pdf.set_y(-12) - pdf.set_font("Helvetica", "I", 8) - pdf.set_text_color(150, 150, 150) + pdf.set_font("Helvetica", "I", _fmt["small_pt"]) + pdf.set_text_color(*_fmt["muted"]) pdf.cell(0, 5, f"Page {pg} of {n_pages}", align="C") # ── Write to disk ───────────────────────────────────────────────── @@ -442,7 +375,7 @@ def _sanitize(text): "path": abs_path, "pages": n_pages, "size_bytes": os.path.getsize(abs_path), - "theme_used": theme, + "theme_used": "format_md", } except PermissionError as exc: diff --git a/app/data/action/edit_pdf.py b/app/data/action/edit_pdf.py index cd3232d1..c3d04934 100644 --- a/app/data/action/edit_pdf.py +++ b/app/data/action/edit_pdf.py @@ -14,9 +14,9 @@ "For tasks that require text reflow (rephrasing paragraphs, inserting new sections, " "reformatting layout): use create_pdf to rebuild the document with changes applied — " "the user receives the same output path with a clean result. " - "When editing a PDF created by create_pdf, use the theme_used value from that call " - "to pick matching accent colours: default=#4f46e5, corporate=#0078d4, " - "minimal=#505050, warm=#b45309, forest=#16a34a. " + "When editing a PDF created by create_pdf, the accent colour is always #FF4F18 " + "(FORMAT.md highlight). Use this for stamps, watermarks, and annotations " + "to match the document style. " "Use absolute paths only." ), mode="CLI", @@ -586,10 +586,11 @@ def _get_span_at_rect(page, target_rect): # ── fill_field (AcroForm via pypdf) ─────────────────────── elif op_type == "fill_field": - # Defer all fill_field ops to after PyMuPDF saves - # (pypdf needs to open the saved file) - # We flag these for post-processing below - pass # handled in post-processing step + # Validate shape up-front so missing field_name is caught + # immediately, even if post-processing later fails wholesale. + if not str(op.get("field_name", "")).strip(): + warnings.append(f"{op_tag}: 'field_name' is required.") + # Actual fill is deferred — see post-processing block below. else: warnings.append(f"{op_tag}: unknown operation type '{op_type}'.") @@ -610,36 +611,56 @@ def _get_span_at_rect(page, target_rect): # ── Post-process: AcroForm fill_field via pypdf ─────────────────── acroform_ops = [ - op for op in operations if str(op.get("type", "")).lower() == "fill_field" + (j, op) + for j, op in enumerate(operations) + if str(op.get("type", "")).lower() == "fill_field" ] if acroform_ops: + # Step 1: open the saved file — failure here means all fill_field + # ops failed for the same upstream reason, warn per-op. try: reader = pypdf.PdfReader(abs_output) writer = pypdf.PdfWriter() writer.append(reader) existing_fields = reader.get_fields() or {} - for op in acroform_ops: - op_tag = "op[fill_field]" - field_name = str(op.get("field_name", "")) + except Exception as e: + for j, op in acroform_ops: + op_tag = f"op[{j}] 'fill_field'" + warnings.append( + f"{op_tag}: could not open PDF for AcroForm processing: " + f"{type(e).__name__}: {e}." + ) + else: + # Step 2: apply each fill_field op individually so failures + # are isolated — one bad field does not block the others. + for j, op in acroform_ops: + op_tag = f"op[{j}] 'fill_field'" + field_name = str(op.get("field_name", "")).strip() value = str(op.get("value", "")) if not field_name: - warnings.append(f"{op_tag}: 'field_name' is required.") - continue + continue # already warned in main loop validation if field_name not in existing_fields: warnings.append( f"{op_tag}: field '{field_name}' not found in AcroForm. " - f"Available fields: {list(existing_fields.keys())[:10]}." + f"Available: {list(existing_fields.keys())[:10]}." ) continue - for page_obj in writer.pages: - writer.update_page_form_field_values( - page_obj, {field_name: value} - ) - ops_done += 1 - with open(abs_output, "wb") as f: - writer.write(f) - except Exception as e: - warnings.append(f"AcroForm fill failed: {type(e).__name__}: {e}.") + try: + for page_obj in writer.pages: + writer.update_page_form_field_values( + page_obj, {field_name: value} + ) + ops_done += 1 + except Exception as e: + warnings.append(f"{op_tag}: {type(e).__name__}: {e}.") + + # Step 3: write result — isolated so a disk failure does not + # hide which fields were successfully processed. + try: + with open(abs_output, "wb") as f: + writer.write(f) + except Exception as e: + warnings.append(f"AcroForm write failed: {type(e).__name__}: {e}.") return _json( "success", diff --git a/app/utils/pdf_format.py b/app/utils/pdf_format.py new file mode 100644 index 00000000..a058d350 --- /dev/null +++ b/app/utils/pdf_format.py @@ -0,0 +1,187 @@ +"""FORMAT.md → PDF style resolver for create_pdf and edit_pdf.""" + +from __future__ import annotations + +import re +from pathlib import Path + +# Built-in CraftBot-brand defaults — used when FORMAT.md is absent or unparseable. +# Values mirror FORMAT.md's current ## global / ## pdf sections. +_DEFAULTS: dict = { + "base": (20, 21, 23), # #141517 + "highlight": (255, 79, 24), # #FF4F18 + "muted": (107, 110, 118), # #6B6E76 + "border": (46, 47, 51), # #2E2F33 + "surface": (30, 31, 34), # #1E1F22 + "light_grey": (244, 244, 245), # #F4F4F5 + "white": (255, 255, 255), + "h1_pt": 24.0, + "h2_pt": 17.0, + "h3_pt": 13.0, + "body_pt": 11.0, + "code_pt": 10.0, + "small_pt": 9.0, + "margin_in": 1.0, + "header_height_in": 0.4, +} + + +def _hex_to_rgb(hex_val: str) -> tuple[int, int, int] | None: + h = str(hex_val).lstrip("#") + if len(h) == 3: + h = "".join(c * 2 for c in h) + if len(h) != 6: + return None + try: + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + except ValueError: + return None + + +def _section(text: str, name: str) -> str: + """Return the body of a ## section up to the next ## heading.""" + pat = rf"^##\s+{re.escape(name)}\b(.*?)(?=^##\s|\Z)" + m = re.search(pat, text, re.IGNORECASE | re.MULTILINE | re.DOTALL) + return m.group(1) if m else "" + + +def _parse_colors(block: str) -> dict[str, tuple[int, int, int]]: + """Extract named hex colors from a text block. + + Finds lines that name a known color role and contain a #rrggbb value. + """ + # Longer tokens must come before their substrings so they match first and + # prevent a line like "Highlight hover: #E64615" from being stored as highlight. + name_map = { + "highlight hover": None, # consume hover variant; None = skip + "highlight_hover": None, + "highlight": "highlight", + "base": "base", + "muted": "muted", + "border": "border", + "surface": "surface", + "light grey": "light_grey", + "light gray": "light_grey", + "white": "white", + } + out: dict = {} + for line in block.splitlines(): + hexes = re.findall(r"#[0-9a-fA-F]{6}\b", line) + if not hexes: + continue + ll = line.lower() + for token, key in name_map.items(): + if token in ll: + if key is not None: + rgb = _hex_to_rgb(hexes[0]) + if rgb: + out[key] = rgb + break + return out + + +def _parse_pt(text: str) -> float | None: + """Parse '22-26pt', '22–26pt', or '22pt' → midpoint float.""" + m = re.search(r"(\d+)(?:[-–](\d+))?pt", text) + if not m: + return None + lo = float(m.group(1)) + hi = float(m.group(2)) if m.group(2) else lo + return (lo + hi) / 2.0 + + +def _parse_inches(text: str) -> float | None: + """Return the first N\" value in a string as a float.""" + m = re.search(r'(\d+(?:\.\d+)?)\s*"', text) + return float(m.group(1)) if m else None + + +def load_style(format_md_path: Path | str | None = None) -> dict: + """Parse FORMAT.md and return a resolved style dict. + + Always returns a complete dict with every key from _DEFAULTS populated. + Any missing or unparseable value falls back to the built-in CraftBot defaults. + """ + style = dict(_DEFAULTS) + if format_md_path is None: + return style + path = Path(format_md_path) + if not path.is_file(): + return style + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return style + + global_block = _section(text, "global") + pdf_block = _section(text, "pdf") + + # Colors from ## global + for key, val in _parse_colors(global_block).items(): + style[key] = val + + # Typography pt sizes from ## global — only table rows (lines containing |) + role_map = { + "h1": "h1_pt", + "h2": "h2_pt", + "h3": "h3_pt", + "body": "body_pt", + "code": "code_pt", + "small": "small_pt", + "caption": "small_pt", + } + for line in global_block.splitlines(): + if "|" not in line: + continue + ll = line.lower() + for role, key in role_map.items(): + if role in ll: + pt = _parse_pt(line) + if pt: + style[key] = pt + break + + # Margin from ## pdf — look for "N" all sides" first, then fall back to + # the first inch value that follows the word "margins?" on the line. + # This avoids capturing the paper size (e.g. "8.5"") that appears on the + # same line as "Margins: 1" all sides." + for line in pdf_block.splitlines(): + if "margin" in line.lower(): + m = re.search(r'(\d+(?:\.\d+)?)\s*"\s+all', line, re.IGNORECASE) + if m: + style["margin_in"] = float(m.group(1)) + break + after = re.search(r"margins?\W+(.*)", line, re.IGNORECASE) + if after: + v = _parse_inches(after.group(1)) + if v: + style["margin_in"] = v + break + + # Header bar height from ## pdf + for line in pdf_block.splitlines(): + if "header" in line.lower() and '"' in line: + v = _parse_inches(line) + if v: + style["header_height_in"] = v + break + + return style + + +def build_theme(s: dict) -> dict: + """Map a FORMAT.md style dict to the theme dict consumed by create_pdf's render pipeline.""" + return { + "hbg": [ + s["base"], + s["base"], + ], # solid header bar (FORMAT.md specifies no gradient) + "accent": s["highlight"], + "h2": s["base"], + "h3": s["muted"], + "body": s["base"], + "cbg": s["light_grey"], + "cc": s["base"], + "rule": s["highlight"], # orange accent rule below header banner + "htxt": s["white"], + } From b04dfe914b273c4eb2168717a82b48c919ae3def Mon Sep 17 00:00:00 2001 From: AlanAAG Date: Mon, 1 Jun 2026 19:58:16 -0600 Subject: [PATCH 2/3] Fix banner title centering and edit_pdf accent colour description --- app/data/action/create_pdf.py | 2 +- app/data/action/edit_pdf.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/app/data/action/create_pdf.py b/app/data/action/create_pdf.py index b6a6ebe6..b32c8d12 100644 --- a/app/data/action/create_pdf.py +++ b/app/data/action/create_pdf.py @@ -270,7 +270,7 @@ def _sanitize(text): if doc_title: pdf.set_font("Helvetica", "B", _fmt["h1_pt"]) pdf.set_text_color(*t["htxt"]) - title_y = y0 + (HH - 20) / 2 - (5 if subtitle else 0) + title_y = y0 + (HH - 12) / 2 - (5 if subtitle else 0) pdf.set_xy(lm + 8, title_y) pdf.cell(pw - 16, 12, doc_title[:72], align="L") diff --git a/app/data/action/edit_pdf.py b/app/data/action/edit_pdf.py index c3d04934..e9e0f973 100644 --- a/app/data/action/edit_pdf.py +++ b/app/data/action/edit_pdf.py @@ -14,9 +14,8 @@ "For tasks that require text reflow (rephrasing paragraphs, inserting new sections, " "reformatting layout): use create_pdf to rebuild the document with changes applied — " "the user receives the same output path with a clean result. " - "When editing a PDF created by create_pdf, the accent colour is always #FF4F18 " - "(FORMAT.md highlight). Use this for stamps, watermarks, and annotations " - "to match the document style. " + "When editing a PDF created by create_pdf, match the accent colour to " + "FORMAT.md's highlight value (default #FF4F18) to align with the document style. " "Use absolute paths only." ), mode="CLI", From baac38a5b1922c677bc945cd55bbc66f3e74a715 Mon Sep 17 00:00:00 2001 From: AlanAAG Date: Thu, 4 Jun 2026 11:17:04 -0600 Subject: [PATCH 3/3] fix: derive subtitle colour from FORMAT.md theme in create_pdf --- app/data/action/create_pdf.py | 2 +- app/utils/pdf_format.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/data/action/create_pdf.py b/app/data/action/create_pdf.py index b32c8d12..04eba416 100644 --- a/app/data/action/create_pdf.py +++ b/app/data/action/create_pdf.py @@ -276,7 +276,7 @@ def _sanitize(text): if subtitle: pdf.set_font("Helvetica", "I", 9) - pdf.set_text_color(200, 210, 240) + pdf.set_text_color(*t["subtitle"]) pdf.set_xy(lm + 8, y0 + HH - 14) pdf.cell(pw - 16, 8, _sanitize(subtitle)[:100], align="L") diff --git a/app/utils/pdf_format.py b/app/utils/pdf_format.py index a058d350..bf9efd42 100644 --- a/app/utils/pdf_format.py +++ b/app/utils/pdf_format.py @@ -184,4 +184,5 @@ def build_theme(s: dict) -> dict: "cc": s["base"], "rule": s["highlight"], # orange accent rule below header banner "htxt": s["white"], + "subtitle": s["light_grey"], }