dreadnode · GangGreenTemperTatum · Jun 12, 2026 · Jun 12, 2026
diff --git a/capabilities/web-security/tests/test_credence.py b/capabilities/web-security/tests/test_credence.py
@@ -263,6 +263,47 @@ async def test_trace_id_is_generated_for_each_assessment(
         assert extract_trace_id(first) != extract_trace_id(second)
 
 
+class TestCvssScore:
+    async def test_cvss_tag_in_output(self, toolset: CredenceTool) -> None:
+        result = await toolset.assess_confidence(
+            claim="IDOR on /api/users/{id}",
+            confidence="high",
+            evidence_basis="poc_confirmed",
+            cvss_score=7.5,
+        )
+        assert "[cvss:7.5]" in result
+        assert "CONFIRMED" in result
+
+    async def test_no_cvss_tag_when_omitted(self, toolset: CredenceTool) -> None:
+        result = await toolset.assess_confidence(
+            claim="test", confidence="high", evidence_basis="poc_confirmed",
+        )
+        assert "[cvss:" not in result
+
+    async def test_low_confidence_high_cvss_warns(self, toolset: CredenceTool) -> None:
+        result = await toolset.assess_confidence(
+            claim="maybe RCE", confidence="low", evidence_basis="assumed",
+            cvss_score=9.8,
+        )
+        assert "CVSS WARNING" in result
+        assert "inflated" in result
+
+    async def test_high_confidence_critical_cvss_warns(self, toolset: CredenceTool) -> None:
+        result = await toolset.assess_confidence(
+            claim="full RCE", confidence="high", evidence_basis="poc_confirmed",
+            cvss_score=9.8,
+        )
+        assert "CVSS WARNING" in result
+        assert "Critical" in result
+
+    async def test_matching_cvss_no_warning(self, toolset: CredenceTool) -> None:
+        result = await toolset.assess_confidence(
+            claim="info disclosure", confidence="high", evidence_basis="poc_confirmed",
+            cvss_score=4.3,
+        )
+        assert "CVSS WARNING" not in result
+
+
 class TestHandleToolCall:
     async def test_via_handle_tool_call(self, toolset: CredenceTool) -> None:
         from dreadnode.agents.tools import FunctionCall, ToolCall

diff --git a/capabilities/web-security/tools/credence.py b/capabilities/web-security/tools/credence.py
@@ -30,6 +30,17 @@
 _MEDIUM_EVIDENCE = {"code_pattern_with_context", "behavior_observed"}
 
 
+def _cvss_mismatch(confidence: str, cvss: float | None) -> str | None:
+    """Flag when CVSS severity band conflicts with confidence level."""
+    if cvss is None:
+        return None
+    if confidence in ("low", "uncertain") and cvss >= 7.0:
+        return f"CVSS {cvss} (High/Critical) with {confidence} confidence — severity likely inflated."
+    if confidence == "high" and cvss >= 9.0:
+        return f"CVSS {cvss} (Critical) — verify this isn't inflated. Critical requires RCE, mass data breach, or full account takeover."
+    return None
+
+
 class CredenceTool(Toolset):
     """Confidence calibration checkpoint for security claims."""
 
@@ -63,47 +74,57 @@ async def assess_confidence(
             "Your agent identifier (e.g. 'agent-opus', 'dn-agent-kimi', "
             "'agent-codex'). Used for log attribution across multi-agent sessions.",
         ] = "unknown",
+        cvss_score: Annotated[
+            float | None,
+            "Your estimated CVSS 3.1 base score (0.0-10.0) for this claim. "
+            "Forces severity reflection before reporting. Logged for calibration.",
+        ] = None,
     ) -> str:
         """Use BEFORE making any claim about a vulnerability, exploitability,
         tech stack, or security impact. Forces structured reflection on what
         you actually know vs. what you're inferring. Do NOT skip this for
         findings you plan to report or act on.
         """
         trace_id = str(uuid.uuid4())
-        prefix = f"[{agent_string}] [trace_id:{trace_id}] "
+        cvss = round(cvss_score, 1) if cvss_score is not None else None
+        cvss_tag = f" [cvss:{cvss}]" if cvss is not None else ""
+        prefix = f"[{agent_string}] [trace_id:{trace_id}]{cvss_tag} "
 
         if confidence == "high" and evidence_basis in _STRONG_EVIDENCE:
-            return (
+            result = (
                 f"{prefix}CONFIRMED — High confidence with strong evidence ({evidence_basis}). "
                 "Proceed with assertion. This is reportable if impact is demonstrated."
             )
-
-        if confidence == "high" and evidence_basis not in _STRONG_EVIDENCE:
-            return (
+        elif confidence == "high":
+            result = (
                 f"{prefix}OVERCONFIDENT — You claimed high confidence but your evidence "
                 f"basis is '{evidence_basis}'. Downgrade to a lead/gadget until you have: "
                 "traced data flow, confirmed with PoC, or verified server response. "
                 "Do NOT claim this is a vulnerability yet."
             )
-
-        if confidence == "medium":
-            if evidence_basis in _STRONG_EVIDENCE:
-                return (
-                    f"{prefix}UPGRADE AVAILABLE — You have strong evidence ({evidence_basis}) "
-                    "but only medium confidence. Re-evaluate: if the evidence directly "
-                    "confirms the claim, upgrade to high confidence and proceed."
-                )
-            return (
+        elif confidence == "medium" and evidence_basis in _STRONG_EVIDENCE:
+            result = (
+                f"{prefix}UPGRADE AVAILABLE — You have strong evidence ({evidence_basis}) "
+                "but only medium confidence. Re-evaluate: if the evidence directly "
+                "confirms the claim, upgrade to high confidence and proceed."
+            )
+        elif confidence == "medium":
+            result = (
                 f"{prefix}UNCONFIRMED LEAD — Medium confidence ({evidence_basis}). "
                 "State this as a potential finding requiring validation. "
                 "Specify exactly what evidence is missing before it becomes reportable. "
                 "Do NOT assign severity or write a report for this yet."
             )
+        else:
+            result = (
+                f"{prefix}INSUFFICIENT — Low confidence ({evidence_basis}). "
+                "Do NOT assert this as a finding. Log as a pattern/gadget for "
+                "future investigation only. Do not invest PoC time without "
+                "additional supporting evidence."
+            )
+
+        mismatch = _cvss_mismatch(confidence, cvss)
+        if mismatch:
+            result += f" CVSS WARNING: {mismatch}"
 
-        # low or uncertain
-        return (
-            f"{prefix}INSUFFICIENT — Low confidence ({evidence_basis}). "
-            "Do NOT assert this as a finding. Log as a pattern/gadget for "
-            "future investigation only. Do not invest PoC time without "
-            "additional supporting evidence."
-        )
+        return result