diff --git a/.codex/skills/tangle-blog-editor/references/rewrite-rubric.md b/.codex/skills/tangle-blog-editor/references/rewrite-rubric.md index ef10aa3..cb7502b 100644 --- a/.codex/skills/tangle-blog-editor/references/rewrite-rubric.md +++ b/.codex/skills/tangle-blog-editor/references/rewrite-rubric.md @@ -11,6 +11,7 @@ Score each dimension 1-5. | Style | Research-note template | Builder voice with varied structure | | Conversion | No next action | Specific install, curl, manifest, CTA, or decision | | Evidence | Unsupported claims | Primary sources, internal links, proof blocks | +| Presentation | Markdown scaffolding only | Artifact, diagram, table, screenshot, trace, or designed cover supports the argument | ## Rewrite Pass Order @@ -21,6 +22,7 @@ Score each dimension 1-5. 5. Retrieval: add answer capsule, comparison table, and FAQ where useful. 6. Tangle: move specific product mechanism earlier. 7. Ending: replace summary with decision or readiness test. +8. Presentation: add or preserve a real artifact when the post is long, code-heavy, or abstract. ## Existing Series Diagnosis diff --git a/.codex/skills/tangle-blog-editor/references/voice-and-style.md b/.codex/skills/tangle-blog-editor/references/voice-and-style.md index 888c0c0..01cf7a7 100644 --- a/.codex/skills/tangle-blog-editor/references/voice-and-style.md +++ b/.codex/skills/tangle-blog-editor/references/voice-and-style.md @@ -4,6 +4,10 @@ Tangle blog posts should read like a senior builder explaining infrastructure they have used, shipped, or verified. +Blog is a broad surface. +It can carry technical explainers, product essays, comparisons, launch notes, field reports, research threads, and posts about anything Tangle has done. +Do not force every post into "technical deep dive" voice. + The target blend: ```text @@ -23,6 +27,8 @@ concrete builder pressure - Do not use "The Object Being Optimized", "Evaluation Protocol", "Working Rule", or "Source Trail" as repeated default headings across a series. - Do not overuse one-sentence paragraphs. - Do not turn every idea into a table or fenced text block. +- Do not use raw inventory counts as proof of seriousness. A blog index should show reader paths, not "80 posts published". +- Do not use markdown scaffolding as the whole presentation layer. Long posts need at least one artifact: diagram, table, screenshot, trace, code sample, terminal proof, or designed cover. - Do not write generic "field overview" prose before saying why Tangle, a builder, or an operator cares. - Do not close with a restated thesis. - Avoid AI cadence: "It is not X. It is Y.", "The useful question is...", "The serious version is...", "That is why...", "Here's the thing", "Turns out". diff --git a/.codex/skills/tangle-blog-proof/SKILL.md b/.codex/skills/tangle-blog-proof/SKILL.md index 8c5e975..aa02944 100644 --- a/.codex/skills/tangle-blog-proof/SKILL.md +++ b/.codex/skills/tangle-blog-proof/SKILL.md @@ -19,6 +19,12 @@ When checking an edited/new post, run: node .codex/skills/tangle-blog-proof/scripts/check-post.mjs src/content/blog/.mdx ``` +When checking the full blog surface, run: + +```bash +pnpm check:blog +``` + Then run the repo's normal validation when the change is substantive: ```bash @@ -36,6 +42,9 @@ The script catches structure, not judgment. Also check: - Competitor comparisons are fair and based on current primary sources. - The CTA is specific enough for a builder or agent to act on. - The post does not end with a summary that merely restates the thesis. +- Blog and research pages do not market raw post counts or other inventory totals. +- Blog index navigation works by series, topic, date, or argument. +- Long posts are not only markdown scaffolding; they carry at least one artifact such as a cover, diagram, table, screenshot, trace, code block, or terminal proof. ## Output diff --git a/.codex/skills/tangle-blog-proof/references/proof-checklist.md b/.codex/skills/tangle-blog-proof/references/proof-checklist.md index bcc8ff0..e78b8c5 100644 --- a/.codex/skills/tangle-blog-proof/references/proof-checklist.md +++ b/.codex/skills/tangle-blog-proof/references/proof-checklist.md @@ -12,6 +12,8 @@ - Direct CTA exists. - Referenced images exist. - No obvious AI-cadence banned phrases. +- Blog and research pages do not show vanity inventory counts such as "80 posts published". +- Blog index gives reader paths through series, topics, dates, or arguments. ## Judgment Gates @@ -20,6 +22,8 @@ - Limitations are explicit. - Competitor comparisons are fair and current. - The ending gives a decision, not a summary. +- Research pages include only work with a claim, model, method, trace/evidence path, or falsifiable argument. +- Blog posts are allowed to be essays, launch notes, field reports, comparisons, or implementation notes; they should not all be forced into one technical deep-dive template. ## Common Fixes @@ -29,3 +33,5 @@ - Move Tangle-specific proof above generic background. - Add FAQ questions that match search intent. - Replace "What This Gets You" with a concrete CTA. +- Replace raw archive totals with series cards, topic filters, or a date-sorted archive. +- Replace markdown-only stretches with a diagram, table, screenshot, trace, terminal output, or designed cover. diff --git a/package.json b/package.json index 24d597a..ddd68c8 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "generate-types": "wrangler types", "status:collect": "node scripts/collect-status.mjs", "check:links": "node scripts/check-links.mjs", + "check:blog": "node scripts/audit-blog.mjs", "check:copy": "node scripts/check-copy.mjs", "check:models": "node scripts/check-models.mjs", "audit:visual": "node scripts/visual-audit.mjs" diff --git a/scripts/audit-blog.mjs b/scripts/audit-blog.mjs new file mode 100644 index 0000000..4890d78 --- /dev/null +++ b/scripts/audit-blog.mjs @@ -0,0 +1,211 @@ +#!/usr/bin/env node +import fs from 'node:fs' +import path from 'node:path' + +const root = process.cwd() +const blogDir = path.join(root, 'src/content/blog') +const pagesDir = path.join(root, 'src/pages') +const json = process.argv.includes('--json') + +const bannedPhrases = [ + 'delve', + 'comprehensive', + 'facilitate', + 'utilizing', + 'moreover', + 'furthermore', + 'landscape', + 'crucial', + 'paradigm', + "let's dive in", + "here's the thing", + 'turns out', + 'not just', + 'more than just', + 'at its core', +] + +const repeatedScaffoldHeadings = [ + 'The Object Being Optimized', + 'Evaluation Protocol', + 'Working Rule', + 'Source Trail', +] + +function listFiles(dir, predicate) { + const files = [] + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name) + if (entry.isDirectory()) files.push(...listFiles(full, predicate)) + else if (predicate(full)) files.push(full) + } + return files +} + +function parseFrontmatter(text) { + const match = text.match(/^---\n([\s\S]*?)\n---\n/) + if (!match) return { frontmatter: '', body: text, data: {} } + + const frontmatter = match[1] + const data = {} + let currentArray = null + + for (const line of frontmatter.split('\n')) { + const arrayItem = line.match(/^\s*-\s+(.+)$/) + if (arrayItem && currentArray) { + data[currentArray].push(cleanValue(arrayItem[1])) + continue + } + + const pair = line.match(/^([A-Za-z0-9_]+):\s*(.*)$/) + if (!pair) continue + + const [, key, rawValue] = pair + currentArray = null + + if (!rawValue.trim()) { + data[key] = [] + currentArray = key + } else if (/^\d+$/.test(rawValue.trim())) { + data[key] = Number(rawValue.trim()) + } else if (rawValue.trim() === 'true' || rawValue.trim() === 'false') { + data[key] = rawValue.trim() === 'true' + } else if (/^\[.*]$/.test(rawValue.trim())) { + data[key] = rawValue + .trim() + .slice(1, -1) + .split(',') + .map((item) => cleanValue(item)) + .filter(Boolean) + } else { + data[key] = cleanValue(rawValue) + } + } + + return { frontmatter, body: text.slice(match[0].length), data } +} + +function cleanValue(value) { + return value.trim().replace(/^['"]|['"]$/g, '') +} + +function relativeFile(file) { + return path.relative(root, file) +} + +function lineOf(text, needle) { + const index = text.indexOf(needle) + if (index < 0) return 1 + return text.slice(0, index).split('\n').length +} + +function imageExists(image) { + if (!image?.startsWith('/images/')) return true + return fs.existsSync(path.join(root, 'public', image.slice(1))) +} + +function addFinding(findings, severity, file, message, needle, text) { + findings.push({ + severity, + file: relativeFile(file), + line: needle && text ? lineOf(text, needle) : 1, + message, + }) +} + +const postFiles = listFiles(blogDir, (file) => file.endsWith('.mdx')).sort() +const findings = [] +const series = new Map() +const tagCounts = new Map() + +for (const file of postFiles) { + const text = fs.readFileSync(file, 'utf8') + const { body, data } = parseFrontmatter(text) + const required = ['title', 'slug', 'summary', 'date', 'author', 'tags'] + + for (const field of required) { + if (data[field] === undefined || data[field] === '') { + addFinding(findings, 'error', file, `Missing frontmatter field: ${field}`) + } + } + + if (data.slug && `${data.slug}.mdx` !== path.basename(file)) { + addFinding(findings, 'warning', file, `Slug does not match filename: ${data.slug}`) + } + + for (const imageField of ['coverImage', 'heroImage']) { + if (data[imageField] && !imageExists(data[imageField])) { + addFinding(findings, 'error', file, `${imageField} does not exist: ${data[imageField]}`) + } + } + + const postSeries = data.series || 'Standalone' + series.set(postSeries, (series.get(postSeries) || 0) + 1) + for (const tag of data.tags || []) tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1) + + const hasBodyImage = /!\[[^\]]*]\(\/images\//.test(body) || /= 5 && !hasBodyImage && !hasTable) { + addFinding(findings, 'warning', file, 'Code-block heavy post without a visual or table artifact') + } + + for (const heading of repeatedScaffoldHeadings) { + if (body.includes(`## ${heading}`)) { + addFinding(findings, 'warning', file, `Repeated scaffold heading: ${heading}`, heading, text) + } + } + + for (const phrase of bannedPhrases) { + const pattern = new RegExp(`\\b${phrase.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i') + const match = body.match(pattern) + if (match) { + addFinding(findings, 'warning', file, `Weak or AI-cadence phrase: "${phrase}"`, match[0], text) + } + } +} + +for (const file of listFiles(pagesDir, (item) => item.endsWith('.astro'))) { + const text = fs.readFileSync(file, 'utf8') + const countPatterns = [ + /posts\.length\s*\}\s*published/, + /\b\d+\s+blog posts\b/i, + /\b\d+\s+posts\b/i, + /\bpublished\s+posts\b/i, + ] + for (const pattern of countPatterns) { + const match = text.match(pattern) + if (match) { + addFinding(findings, 'error', file, 'Do not market raw blog volume; organize by reader path instead', match[0], text) + } + } +} + +const summary = { + posts: postFiles.length, + series: Object.fromEntries([...series.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))), + tags: Object.fromEntries([...tagCounts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))), + errors: findings.filter((finding) => finding.severity === 'error').length, + warnings: findings.filter((finding) => finding.severity === 'warning').length, + findings, +} + +if (json) { + console.log(JSON.stringify(summary, null, 2)) +} else { + console.log(`Blog audit: ${summary.posts} posts, ${Object.keys(summary.series).length} series, ${summary.errors} errors, ${summary.warnings} warnings`) + console.log('') + for (const finding of findings) { + console.log(`${finding.severity.toUpperCase()} ${finding.file}:${finding.line} - ${finding.message}`) + } + if (findings.length) console.log('') + console.log(summary.errors ? 'NEEDS WORK' : 'PASS') +} + +process.exit(summary.errors > 0 ? 1 : 0) diff --git a/src/content/blog/ai-agent-sandbox.mdx b/src/content/blog/ai-agent-sandbox.mdx index 4aea4ac..e667257 100644 --- a/src/content/blog/ai-agent-sandbox.mdx +++ b/src/content/blog/ai-agent-sandbox.mdx @@ -16,7 +16,7 @@ heroImage: /images/covers/agent-intent-infrastructure.svg imageAlt: 'Agent runtime diagram showing sandbox files, processes, network policy, snapshots, and evidence' --- -An AI agent sandbox is an isolated runtime where an agent can create files, run processes, call tools, use the network under policy, preserve state, and return evidence. It is not just a code interpreter, browser automation session, or serverless job runner. Tangle Sandbox gives agents a machine-shaped workspace for real work: install dependencies, execute tests, inspect artifacts, recover from failure, and keep the dangerous parts contained. Start with [Tangle Sandbox](https://github.com/tangle-network/tcloud) when the agent needs an environment, not only an API. +An AI agent sandbox is an isolated runtime where an agent can create files, run processes, call tools, use the network under policy, preserve state, and return evidence. A code interpreter, browser automation session, or serverless job runner is too narrow for that job. Tangle Sandbox gives agents a machine-shaped workspace for real work: install dependencies, execute tests, inspect artifacts, recover from failure, and keep the dangerous parts contained. Start with [Tangle Sandbox](https://github.com/tangle-network/tcloud) when the agent needs an environment, not only an API. The hard part of agent infrastructure is not letting a model produce text. It is giving that model a place to act without handing it your laptop, production credentials, or a shared build server. diff --git a/src/content/blog/blueprint-tee-x402-production-gating.mdx b/src/content/blog/blueprint-tee-x402-production-gating.mdx index a2f310b..ee0a36d 100644 --- a/src/content/blog/blueprint-tee-x402-production-gating.mdx +++ b/src/content/blog/blueprint-tee-x402-production-gating.mdx @@ -155,7 +155,7 @@ pub enum SecretInjectionPolicy { } ``` -The builder enforces this at construction time: any `TeeConfig` with a non-Disabled mode automatically gets `SealedOnly`. Configs deserialized from JSON or TOML go through `validate()` which applies the same check. The reason is not just security hygiene. Env-var injection via container recreation invalidates attestation, breaks sealed secrets, and loses the on-chain deployment ID. A deployed TEE service whose secrets can be changed via environment variable gives up the entire attestation chain. +The builder enforces this at construction time: any `TeeConfig` with a non-Disabled mode automatically gets `SealedOnly`. Configs deserialized from JSON or TOML go through `validate()` which applies the same check. This protects the attestation chain itself. Env-var injection via container recreation invalidates attestation, breaks sealed secrets, and loses the on-chain deployment ID. A deployed TEE service whose secrets can be changed via environment variable gives up the entire attestation chain. ### 6. The blueprint must expose a container source diff --git a/src/content/blog/building-ai-services-on-tangle.mdx b/src/content/blog/building-ai-services-on-tangle.mdx index f2a0233..5032861 100644 --- a/src/content/blog/building-ai-services-on-tangle.mdx +++ b/src/content/blog/building-ai-services-on-tangle.mdx @@ -542,7 +542,7 @@ Full accountability chain. ## What's Next -The final post in this series covers the road ahead: what we're building next, where Tangle fits in the broader landscape, and how to get involved. +The final post in this series covers the road ahead: what we're building next, where Tangle fits in the broader market, and how to get involved. ## FAQ diff --git a/src/content/blog/crypto-tax-software-2026-defi-staking-wallets.mdx b/src/content/blog/crypto-tax-software-2026-defi-staking-wallets.mdx index a371214..aeb418f 100644 --- a/src/content/blog/crypto-tax-software-2026-defi-staking-wallets.mdx +++ b/src/content/blog/crypto-tax-software-2026-defi-staking-wallets.mdx @@ -66,7 +66,7 @@ For broader complex returns, crypto should connect to income, business activity, ## Where Tangle Fits -Tangle Tax Agent should produce a review packet, not just a gain/loss number. It should organize wallet files, classify events, flag incomplete history, and prepare draft filing artifacts for review-before-submit. The relevant control model is covered in [Automated Tax Filing With Review Control](/blog/automated-tax-filing-review-before-submit). +Tangle Tax Agent should produce a review packet before any gain/loss number. It should organize wallet files, classify events, flag incomplete history, and prepare draft filing artifacts for review-before-submit. The relevant control model is covered in [Automated Tax Filing With Review Control](/blog/automated-tax-filing-review-before-submit). ## What This Does Not Prove diff --git a/src/content/blog/distributed-training-demo.mdx b/src/content/blog/distributed-training-demo.mdx index da17207..a20196e 100644 --- a/src/content/blog/distributed-training-demo.mdx +++ b/src/content/blog/distributed-training-demo.mdx @@ -15,7 +15,7 @@ tags: Distributed training over the open internet usually dies at the sync step. The model can fit on GPUs, the data can be sharded, and the operators can be paid, but the network still has to move training state between machines that do not sit inside one data center. That is the pressure behind DeMo, Decoupled Momentum Optimization: reduce the communication payload enough that permissionless operators can coordinate without pretending they are one tightly coupled cluster. -Tangle's [Training Blueprint](https://github.com/tangle-network/training-blueprint) is the protocol version of that idea. It is not just a research note. It is a service surface where operators join training jobs, synchronize state, submit checkpoints, and get paid or penalized under network rules. +Tangle's [Training Blueprint](https://github.com/tangle-network/training-blueprint) is the protocol version of that idea. It turns the research direction into a service surface where operators join training jobs, synchronize state, submit checkpoints, and get paid or penalized under network rules. ## The Claim @@ -117,7 +117,7 @@ That is where Tangle's service graph starts to matter. A model trained through o This post does not prove every permissionless training job will converge. It does not prove 10,000x lower communication always means lower total cost. It does not prove Tangle has solved dataset quality, adversarial workers, or evaluation capture for every training workload. -It proves a narrower direction: if compressed synchronization makes open-network training viable, the remaining missing piece is not just optimizer code. It is an operator-run service layer with payment, evidence, health checks, and verification. +It proves a narrower direction: if compressed synchronization makes open-network training viable, optimizer code is only one piece. The missing layer is an operator-run service with payment, evidence, health checks, and verification. ## Start diff --git a/src/content/blog/how-tangle-verifies-work.mdx b/src/content/blog/how-tangle-verifies-work.mdx index d8be827..b453654 100644 --- a/src/content/blog/how-tangle-verifies-work.mdx +++ b/src/content/blog/how-tangle-verifies-work.mdx @@ -25,7 +25,7 @@ The hardest question in decentralized infrastructure isn't "how do we run comput This post covers what each verification mechanism actually proves, where it breaks down, and how Tangle lets developers wire it all together. -## Why Not Just Use AWS? +## Where Cloud Providers Stop AWS, Google Cloud, and Azure have decades of production hardening, legal accountability, and compliance certifications. For most applications, they're the right choice. diff --git a/src/content/blog/on-chain-rfq-job-quotes-verification-slashing.mdx b/src/content/blog/on-chain-rfq-job-quotes-verification-slashing.mdx index c09607e..7144c70 100644 --- a/src/content/blog/on-chain-rfq-job-quotes-verification-slashing.mdx +++ b/src/content/blog/on-chain-rfq-job-quotes-verification-slashing.mdx @@ -192,7 +192,7 @@ With real numbers: suppose an operator could save $5,000 by running a cheaper mo ### How This Compares to Other Accountability Models -Stake-based slashing isn't the only approach to operator accountability, and each alternative makes different tradeoffs. Reputation systems (used by most centralized cloud marketplaces) are cheap and simple, but a malicious operator can build reputation with honest behavior and exploit it later, and reputation scores aren't enforceable on-chain. Escrow models hold payment until delivery confirmation, but they require a trusted arbiter to resolve disputes and don't impose penalties beyond withholding payment, so the operator's downside is capped at the job revenue. Optimistic fraud proofs (used by Optimism and Arbitrum for rollup verification) assume results are correct unless challenged within a window, which is gas-efficient, but detection depends entirely on external watchers submitting challenges in time. Stake-based slashing is more capital-intensive for operators, but it provides the strongest deterrent: the penalty for cheating scales with the operator's total stake, not just the value of one job. +Stake-based slashing isn't the only approach to operator accountability, and each alternative makes different tradeoffs. Reputation systems (used by most centralized cloud marketplaces) are cheap and simple, but a malicious operator can build reputation with honest behavior and exploit it later, and reputation scores aren't enforceable on-chain. Escrow models hold payment until delivery confirmation, but they require a trusted arbiter to resolve disputes and don't impose penalties beyond withholding payment, so the operator's downside is capped at the job revenue. Optimistic fraud proofs (used by Optimism and Arbitrum for rollup verification) assume results are correct unless challenged within a window, which is gas-efficient, but detection depends entirely on external watchers submitting challenges in time. Stake-based slashing is more capital-intensive for operators, but it provides the strongest deterrent: the penalty for cheating scales with the operator's total stake rather than the value of one job. The combination of technical verification and economic enforcement is more robust than either alone. The known gaps: irrational actors might cheat anyway (which is why verification mechanisms exist as the first line of defense), and subtle quality degradation (serving a slightly worse model, returning slightly stale results) might lower `P(detection)` enough to shift the equation. diff --git a/src/content/blog/payment-native-infrastructure-ai-agent-product-strategy.mdx b/src/content/blog/payment-native-infrastructure-ai-agent-product-strategy.mdx index 5ae5b51..4da2c97 100644 --- a/src/content/blog/payment-native-infrastructure-ai-agent-product-strategy.mdx +++ b/src/content/blog/payment-native-infrastructure-ai-agent-product-strategy.mdx @@ -45,7 +45,7 @@ If the product needs a human dashboard and long-term seat management, normal Saa ## Why Billing Location Matters -Billing is not just a pricing page. It encodes who the system thinks the customer is. +Billing is a product boundary. It encodes who the system thinks the customer is. Traditional SaaS assumes: @@ -111,7 +111,7 @@ This pairs naturally with [AI code audit with sandboxed agents](/blog/ai-code-au ### Browser Evidence Runner -Input: URL, goal, auth state, browser constraints. Output: screenshots, DOM snippets, action log, stop reason. Payment: per scenario. Verification: evidence bundle, not just a pass/fail string. +Input: URL, goal, auth state, browser constraints. Output: screenshots, DOM snippets, action log, stop reason. Payment: per scenario. Verification: evidence bundle rather than a pass/fail string. This connects to [browser automation for AI agents](/blog/browser-automation-for-ai-agents) and [Tangle Browser Agent vs Browserbase and Browser Use](/blog/tangle-browser-agent-vs-browserbase-browser-use). diff --git a/src/content/blog/self-improving-stack-agent-runtime-topology.mdx b/src/content/blog/self-improving-stack-agent-runtime-topology.mdx index 5c5afff..4b9181b 100644 --- a/src/content/blog/self-improving-stack-agent-runtime-topology.mdx +++ b/src/content/blog/self-improving-stack-agent-runtime-topology.mdx @@ -306,7 +306,7 @@ The trace must show: Without that trace, you cannot tell whether the topology helped, whether one branch got lucky, or whether the selector quietly ignored the evidence. -## Evaluation Protocol +## Runtime Topology Test A serious runtime topology eval should treat topology changes as architecture changes. @@ -390,7 +390,7 @@ The operational test is simple: if the desired improvement would change the syst Topology is where agent systems stop being advice and become execution. -## Source Trail +## Sources For Runtime Topology Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-evaluation-gates.mdx b/src/content/blog/self-improving-stack-evaluation-gates.mdx index e9585b8..96e1917 100644 --- a/src/content/blog/self-improving-stack-evaluation-gates.mdx +++ b/src/content/blog/self-improving-stack-evaluation-gates.mdx @@ -119,7 +119,7 @@ n_pairs >= n_min LCB_95(median(delta)) > epsilon ``` -`LCB_95` is the lower confidence bound. If the lower bound clears the threshold, the gate has evidence that the lift is not just random luck. +`LCB_95` is the lower confidence bound. If the lower bound clears the threshold, the gate has evidence that the lift survived uncertainty instead of random luck. This is where bootstrap confidence intervals are useful. You resample paired deltas, compute the median for each resample, and inspect the lower quantile: @@ -545,7 +545,7 @@ The final output is scored, but the branch, tool, verifier, and selector evidenc All failures collapse into one scalar, so the next optimizer has no diagnostic direction. -## Working Rule +## Promotion Rule The optimizer proposes. @@ -571,7 +571,7 @@ If the answer is missing, the candidate stays a candidate. The gate is where self-improvement stops being a story about better prompts and becomes a release discipline. -## Source Trail +## Sources For Evaluation Gates Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-governance.mdx b/src/content/blog/self-improving-stack-governance.mdx index 03b2b38..a56a5ab 100644 --- a/src/content/blog/self-improving-stack-governance.mdx +++ b/src/content/blog/self-improving-stack-governance.mdx @@ -16,7 +16,7 @@ A self-improving agent is an optimizer pointed at its own behavior. That sounds abstract until the system has tools, memory, credentials, subagents, evals, worktrees, and promotion gates. -Then the optimizer is not just changing text. +Then the optimizer is changing system behavior, not text. It can change what future agents see, what they believe, which branches run, which outputs are selected, which benchmarks matter, and which candidate becomes production. @@ -617,7 +617,7 @@ That is not bureaucracy. That is how a learning system remains a system. -## Source Trail +## Sources For Governance Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-harness-evolution.mdx b/src/content/blog/self-improving-stack-harness-evolution.mdx index 17ab35d..9f84cf4 100644 --- a/src/content/blog/self-improving-stack-harness-evolution.mdx +++ b/src/content/blog/self-improving-stack-harness-evolution.mdx @@ -100,7 +100,7 @@ C = cost vector lambda = cost weights ``` -The promotion rule is not just the objective. It is a gate: +The promotion rule governs the objective: ```text promote(h) iff @@ -580,7 +580,7 @@ When the current surface cannot express the next improvement, the correct move i That is when the harness has to evolve. -## Source Trail +## Sources For Harness Evolution Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-memory-flywheels.mdx b/src/content/blog/self-improving-stack-memory-flywheels.mdx index 055f600..370cc8c 100644 --- a/src/content/blog/self-improving-stack-memory-flywheels.mdx +++ b/src/content/blog/self-improving-stack-memory-flywheels.mdx @@ -502,7 +502,7 @@ link contradiction evidence trigger held-out replay ``` -Bad memory cannot just be deleted quietly. The system needs to learn why it was bad. +Bad memory cannot be deleted quietly. The system needs to learn why it was bad. ## How Tangle Fits @@ -620,7 +620,7 @@ A memory system is doing real self-improvement when all of these are true: 3. The write is source-grounded or explicitly scoped to its evidence. 4. The gate admits, rejects, asks, quarantines, or expires it. 5. Future retrieval selects it only for appropriate roles and tasks. -6. A paired eval shows task lift, not just retrieval activity. +6. A paired eval shows task lift rather than retrieval activity. 7. Staleness, contradiction, privacy, and poisoning have review paths. ``` @@ -628,7 +628,7 @@ If any part is missing, the system may still be useful, but it is not a discipli It may just be a larger prompt with a longer memory leak. -## Source Trail +## Sources For Memory Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-multi-agent-coordination.mdx b/src/content/blog/self-improving-stack-multi-agent-coordination.mdx index 7b9aa0e..6a4dda4 100644 --- a/src/content/blog/self-improving-stack-multi-agent-coordination.mdx +++ b/src/content/blog/self-improving-stack-multi-agent-coordination.mdx @@ -22,7 +22,7 @@ Multi-agent coordination becomes real when roles have separate contracts, state The persona is content. Coordination is structure. -## The Object Being Optimized +## The Coordination Surface The previous post made runtime topology explicit: @@ -353,7 +353,7 @@ selection: decision rationale ``` -In the local `@tangle-network/agent-eval@0.34.1` source, the package is not just a judge wrapper. It is a promotion and analysis system: +In the local `@tangle-network/agent-eval@0.34.1` source, the package is a promotion and analysis system, not a judge wrapper: - `AgentProfileCell`, `AGENT_PROFILE_KINDS`, `buildSandboxAgentProfileCell`, and `toAgentProfileJson`: stable cells for model, prompt, tool, skill, runtime, and harness variation. - `runEvalCampaign`: variant by scenario campaign runner with raw-provider capture and profile-cell checks. @@ -413,7 +413,7 @@ The final answer looks good, but the system cannot show which child saw which co These are not edge cases. They are the default unless the coordination structure prevents them. -## Evaluation Protocol +## Coordination Test Do not ask whether a multi-agent system "feels smarter." Ask whether it beats the right baseline. @@ -502,7 +502,7 @@ This is where GEPA, MIPRO, SkillOpt, agent-runtime, agent-eval, and meta-harness The mistake is asking one optimizer to search a surface it cannot execute. -## Working Rule +## Coordination Rule Use multiple agents when the work needs at least one of these: @@ -531,7 +531,7 @@ If not, the coordination is not yet a system property. It is prose. Personas can help agents think in different local modes. Coordination decides whether those modes become useful work. -## Source Trail +## Sources For Coordination Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-optimization-theory.mdx b/src/content/blog/self-improving-stack-optimization-theory.mdx index 7e0875f..19f94fc 100644 --- a/src/content/blog/self-improving-stack-optimization-theory.mdx +++ b/src/content/blog/self-improving-stack-optimization-theory.mdx @@ -121,7 +121,7 @@ where `J` is expensive, noisy, partially subjective, and easy to game. Trace capture becomes central here. A score without a trace is almost useless for improvement. If a coding agent fails, the next action depends on whether it misunderstood the task, chose the wrong file, ran the wrong test, ignored a failure, used a stale dependency, exceeded the turn budget, or got blocked by a missing credential. Those are different failure modes. A single scalar score collapses them. -This is the core insight behind reflective optimizers. Language is not just the object being optimized. Language is also a diagnostic channel. A trace can be summarized, critiqued, and converted into a candidate change. +This is the core insight behind reflective optimizers. Language is both the object being optimized and a diagnostic channel. A trace can be summarized, critiqued, and converted into a candidate change. ## The Current Map @@ -316,7 +316,7 @@ The evaluation layer is not secondary. It is the thing that makes optimization l ## What Is Actually New Right Now -The current wave is not just "better prompts." It is the movement of optimization outward from model weights into the artifacts around the model. +The current wave moves optimization outward from model weights into the artifacts around the model. DSPy and MIPRO made LM programs optimizable. GEPA showed that reflective text evolution can be highly sample efficient for compound systems. Ax is packaging these ideas for production TypeScript agents and flows. SkillOpt treats skills as durable trainable state, with discipline borrowed from model optimization. AlphaEvolve makes executable code the candidate surface. Microsoft Frontier Tuning points toward enterprise reinforcement learning environments where workflows, tools, models, skills, and harnesses co-evolve inside a compliance boundary. @@ -334,7 +334,7 @@ If you score the wrong thing, you optimize the wrong thing. For agent builders, that is the whole game. -## Source Trail +## Sources For Optimization Theory Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-post-training.mdx b/src/content/blog/self-improving-stack-post-training.mdx index 2d74838..e620f3b 100644 --- a/src/content/blog/self-improving-stack-post-training.mdx +++ b/src/content/blog/self-improving-stack-post-training.mdx @@ -253,7 +253,7 @@ That can reduce cost, latency, or deployment size. It can also import the teache Distillation is not the same as a self-improvement loop. It is compression or transfer unless the student is evaluated, used to generate new evidence, and improved under a gate. -Microsoft's MAI announcement is notable here because it says MAI-Thinking-1 was trained from the ground up on clean data without distillation from third-party models. That is a data-lineage and independence claim, not just a model-performance claim. +Microsoft's MAI announcement is notable here because it says MAI-Thinking-1 was trained from the ground up on clean data without distillation from third-party models. That is a data-lineage and independence claim before it is a model-performance claim. ## Process Supervision @@ -347,7 +347,7 @@ tuned output models, skills, and harness It also says the RLE is used for both post-training and inference: during training it learns from workflows, tool usage, and eval signals; at inference it explores multiple frontier and fine-tuned models across turns to find stronger candidate paths before returning an answer. -That is not just fine-tuning a prompt. +That moves beyond fine-tuning a prompt. It is an environment-level adaptation loop: @@ -416,7 +416,7 @@ Not every successful trace should become a gradient update. ## The Data Boundary -Model training changes the risk surface because training data is not just context. It can become behavior. +Model training changes the risk surface because training data can become behavior. A training-ready record needs: @@ -446,7 +446,7 @@ do not backpropagate private data outside its boundary do not let synthetic data lose its label ``` -The post-training layer needs a data firewall, not just a dataset. +The post-training layer needs a data firewall before it needs a dataset. ## Where Tangle Fits @@ -567,7 +567,7 @@ Why does this behavior belong in weights instead of in the harness? That is the post-training question. -## Source Trail +## Sources For Post-training Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-prompt-optimization.mdx b/src/content/blog/self-improving-stack-prompt-optimization.mdx index 8ba4e7f..5bf3a0d 100644 --- a/src/content/blog/self-improving-stack-prompt-optimization.mdx +++ b/src/content/blog/self-improving-stack-prompt-optimization.mdx @@ -30,7 +30,7 @@ Which factors are mutable, which factors are held fixed, and which evaluator is Answer that and the ecosystem becomes legible as a set of search problems over different coordinate systems. -## The Object Being Optimized +## The Prompt Search Surface Let: @@ -69,7 +69,7 @@ text surface -> planner hint -> runtime capability missing -> no action -> evalu The first path is a causal optimization claim. The second is a measurement artifact. -## Prompt Surface, Not Just Prompt String +## Prompt Surface Versus Prompt String The word "prompt" hides several artifacts that behave differently under optimization. @@ -368,7 +368,7 @@ They do not optimize the same surface. Prompt optimization is one coordinate in the larger search space. It is powerful because language is now both a control surface and a feedback channel. It is limited because real agents are not made of language alone. Strong systems use GEPA, MIPRO, DSPy, AxLLM, and TextGrad-style methods where text has leverage, then hand off to runtime, eval, skill, memory, code, or model optimization when traces prove the bottleneck lives somewhere else. -## Source Trail +## Sources For Prompt Optimization Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-skill-optimization.mdx b/src/content/blog/self-improving-stack-skill-optimization.mdx index b2a1c42..ed1d085 100644 --- a/src/content/blog/self-improving-stack-skill-optimization.mdx +++ b/src/content/blog/self-improving-stack-skill-optimization.mdx @@ -137,7 +137,7 @@ external procedure can be trained while the policy model stays fixed That is a different layer from prompt optimization. Prompt search asks what text should steer this program. Skill optimization asks what reusable procedure the agent should inherit next time. -## Why Skills Are Not Just Longer Prompts +## Why Skills Outlive Prompts A skill has lifecycle. @@ -252,7 +252,7 @@ Is the failure caused by missing procedure, missing activation, missing affordan Only the first two are skill optimization problems. -## Evaluation Protocol +## Skill Release Test A serious skill optimization protocol should treat a skill change like a behavior-bearing release. @@ -349,7 +349,7 @@ They share the same skeleton: propose, run, score, compare, update, promote. They do not train the same state. -## Source Trail +## Sources For Skill Optimization Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-test-time-compute.mdx b/src/content/blog/self-improving-stack-test-time-compute.mdx index c84a4fd..d50c558 100644 --- a/src/content/blog/self-improving-stack-test-time-compute.mdx +++ b/src/content/blog/self-improving-stack-test-time-compute.mdx @@ -403,7 +403,7 @@ runtime spends compute eval proves whether the spend was worth it ``` -## Evaluation Protocol +## Test-time Compute Budget Test A serious test-time compute eval starts with a budget table. @@ -449,7 +449,7 @@ branch_failures trace_integrity ``` -Also report dominance, not just mean score: +Also report dominance rather than mean score alone: ```text strategy_a dominates strategy_b if: @@ -511,7 +511,7 @@ The token budget is matched, but one strategy uses much more sandbox time, brows The system reports that one candidate succeeded somewhere in the batch but cannot select it reliably. -## Working Rule +## Spend Rule Do not evaluate an agent topology against one sample. @@ -541,7 +541,7 @@ Given the same budget, this allocation policy produced better verified outcomes. That is the first bar for runtime topology, multi-agent coordination, and self-improving harnesses. -## Source Trail +## Sources For Test-time Compute Source freshness checked on 2026-06-06. diff --git a/src/content/blog/self-improving-stack-trace-systems.mdx b/src/content/blog/self-improving-stack-trace-systems.mdx index 89d1dcc..fafdf67 100644 --- a/src/content/blog/self-improving-stack-trace-systems.mdx +++ b/src/content/blog/self-improving-stack-trace-systems.mdx @@ -489,7 +489,7 @@ access_token refresh_token ``` -But redaction is not just deletion. It records what was removed: +Redaction records what was removed: ```text redactedFields = [...] @@ -609,7 +609,7 @@ Sensitive fields are removed without recording what was removed, destroying the Run records, traces, scorecard cells, and analyst findings use different ids, so the evidence cannot be joined. -## Working Rule +## Trace Rule Do not optimize from final scores alone. @@ -637,7 +637,7 @@ If it cannot answer those questions, it is not training data for a self-improvin The trace is where agent behavior becomes learnable. -## Source Trail +## Sources For Trace Systems Source freshness checked on 2026-06-06. diff --git a/src/content/blog/tangle-sandbox-vs-daytona-modal.mdx b/src/content/blog/tangle-sandbox-vs-daytona-modal.mdx index 386e706..8798a18 100644 --- a/src/content/blog/tangle-sandbox-vs-daytona-modal.mdx +++ b/src/content/blog/tangle-sandbox-vs-daytona-modal.mdx @@ -133,4 +133,4 @@ It is an isolated environment where an agent can edit files, run tools, recover ### How should I compare Daytona, Modal, and Tangle? -Run the same real task in each: dependency install, file edits, intentional failure, recovery, logs, artifacts, and any browser evidence. Compare the final evidence, not just whether code ran. +Run the same real task in each: dependency install, file edits, intentional failure, recovery, logs, artifacts, and any browser evidence. Compare the final evidence rather than whether code ran. diff --git a/src/content/blog/tangle-sandbox-vs-e2b.mdx b/src/content/blog/tangle-sandbox-vs-e2b.mdx index e908961..a54fea7 100644 --- a/src/content/blog/tangle-sandbox-vs-e2b.mdx +++ b/src/content/blog/tangle-sandbox-vs-e2b.mdx @@ -60,7 +60,7 @@ The hard part of real agent work is not always the first command. It is the reco An agent installs the wrong dependency. A generated test fails. A browser step proves the UI is broken but the code command still exits zero. A credential has to be present for one tool call and absent from the artifact. The useful output is not stdout; it is the workspace state, the failed command, the corrected diff, the preview, and the trace that explains why a reviewer should trust the result. -That is the Tangle frame. Tangle Sandbox is not just a place to run code. It is one surface inside a larger agent runtime stack: +That is the Tangle frame. Tangle Sandbox is a surface inside a larger agent runtime stack: - [AI agent sandbox](/blog/ai-agent-sandbox) for workspace execution, package installs, previews, snapshots, and recovery. - [Browser automation for AI agents](/blog/browser-automation-for-ai-agents) when the artifact needs browser evidence, not only terminal output. @@ -141,7 +141,7 @@ Choose E2B when: Choose Tangle Sandbox when: -- The agent needs a persistent workspace, not just a command runner. +- The agent needs a persistent workspace, not only a command runner. - Recovery from failure is part of the product experience. - Browser evidence has to travel with code output. - Credentials need policy around the run and artifact boundary. diff --git a/src/content/blog/the-self-improving-stack.mdx b/src/content/blog/the-self-improving-stack.mdx index a0096b0..f037ced 100644 --- a/src/content/blog/the-self-improving-stack.mdx +++ b/src/content/blog/the-self-improving-stack.mdx @@ -471,7 +471,7 @@ If the answer is only "the model reflected," there probably is not. - [Memory Is Not Automatically Learning](/blog/self-improving-stack-memory-flywheels/) explains persistence, retrieval, source grounding, and poisoning. - [Self-Improvement Needs A Safety Case](/blog/self-improving-stack-governance/) closes with authority, governance, red teams, and release gates. -## Source Trail +## Sources For The Stack Source freshness checked on 2026-06-06. diff --git a/src/content/blog/why-ai-infrastructure-needs-decentralization.mdx b/src/content/blog/why-ai-infrastructure-needs-decentralization.mdx index c70645f..e13d5ac 100644 --- a/src/content/blog/why-ai-infrastructure-needs-decentralization.mdx +++ b/src/content/blog/why-ai-infrastructure-needs-decentralization.mdx @@ -131,7 +131,7 @@ These are real constraints. Building within them requires clear-eyed assessment Cloud providers give you audit logs they control. They have root access to your workloads. For AI services specifically, there's no way to verify which model was run, whether outputs were modified, or if prompts were logged. Cryptoeconomic infrastructure replaces reputation-based trust with cryptographic verification and economic penalties. **What is cryptoeconomic security?** -Operators stake assets that can be slashed for misbehavior. If the expected cost of cheating (probability of detection times slash amount) exceeds the expected benefit, rational operators don't cheat. This is combined with TEEs, MPC, and redundant execution for prevention, not just punishment. +Operators stake assets that can be slashed for misbehavior. If the expected cost of cheating (probability of detection times slash amount) exceeds the expected benefit, rational operators don't cheat. This is combined with TEEs, MPC, and redundant execution for prevention before punishment. **How is Tangle different from other decentralized compute platforms?** Tangle is a general-purpose coordination layer where developers choose and configure verification per blueprint. It doesn't prescribe a single verification mechanism. Developers compose TEEs, MPC, ZK proofs, and redundant execution based on their specific trust requirements. The protocol handles staking, slashing, and settlement. diff --git a/src/pages/blog/index.astro b/src/pages/blog/index.astro index ad33b8e..a502b92 100644 --- a/src/pages/blog/index.astro +++ b/src/pages/blog/index.astro @@ -1,68 +1,144 @@ --- -import BaseLayout from '../../layouts/BaseLayout.astro'; -import BlogCover from '../../components/BlogCover.astro'; -import { getCollection } from 'astro:content'; +import BaseLayout from '../../layouts/BaseLayout.astro' +import BlogCover from '../../components/BlogCover.astro' +import { getCollection } from 'astro:content' -const posts = await getCollection('blog', ({ data }) => !data.draft); +const posts = await getCollection('blog', ({ data }) => !data.draft) const sortedPosts = posts.sort( (a, b) => new Date(b.data.date).getTime() - new Date(a.data.date).getTime(), -); - -const featuredPost = sortedPosts[0]; -const remainingPosts = sortedPosts.slice(1); +) +const normalizeSeries = (series?: string) => series?.replace(/^['"]|['"]$/g, '') || 'Standalone' const formatDate = (date: string) => new Date(date).toLocaleDateString('en-US', { year: 'numeric', month: 'short', day: 'numeric', - }); + }) + +const topicLabels: Record = { + agents: 'Agents', + 'browser-agent': 'Browser', + x402: 'x402', + 'tax-agent': 'Tax agent', + 'blueprint-agent': 'Blueprint agent', + 'tangle-protocol': 'Protocol', + 'code-auditor': 'Code audit', +} + +const topicOptions = Object.keys(topicLabels).filter((topic) => + sortedPosts.some((post) => post.data.tags.includes(topic)), +) + +const seriesMap = new Map() +for (const post of sortedPosts) { + const series = normalizeSeries(post.data.series) + if (!seriesMap.has(series)) seriesMap.set(series, []) + seriesMap.get(series)?.push(post) +} + +const seriesGroups = [...seriesMap.entries()] + .map(([name, entries]) => { + const ordered = [...entries].sort((a, b) => { + const orderA = a.data.seriesOrder ?? 999 + const orderB = b.data.seriesOrder ?? 999 + if (orderA !== orderB) return orderA - orderB + return new Date(b.data.date).getTime() - new Date(a.data.date).getTime() + }) + + const representative = ordered.find((post) => post.data.coverImage || post.data.heroImage) ?? ordered[0] + const latest = [...entries].sort( + (a, b) => new Date(b.data.date).getTime() - new Date(a.data.date).getTime(), + )[0] + + return { + name, + representative, + latest, + topics: [...new Set(entries.flatMap((post) => post.data.tags))], + } + }) + .sort((a, b) => { + if (a.name === 'Standalone') return 1 + if (b.name === 'Standalone') return -1 + return a.name.localeCompare(b.name) + }) ---
-

Technical deep dives.

-

Architecture notes, implementation guides, and production lessons from the Tangle stack.

+

Writing from Tangle.

+

Implementation notes, product essays, research threads, and field reports from agent infrastructure work.

- {featuredPost && ( - - -
- -

{featuredPost.data.title}

-

{featuredPost.data.summary}

-
-
- )} +
+
+

Series

+

Follow a topic through its strongest thread, then branch into the archive.

+
+ + +
-
-

All posts

- {posts.length} published +
+
+

Archive

+

Filter by the work you are trying to understand, not by site inventory.

+
+
+ + {topicOptions.map((topic) => ( + + ))} +
-
- {remainingPosts.map((post) => ( - + +
+ {sortedPosts.map((post) => ( +

{post.data.title}

@@ -74,6 +150,29 @@ const formatDate = (date: string) =>
+ +