diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml
index 3c48e4b..24c7c81 100644
--- a/.github/workflows/security-scan.yml
+++ b/.github/workflows/security-scan.yml
@@ -1,8 +1,14 @@
-# Security scan skills using cisco-ai-skill-scanner.
+# Security scan skills using NVIDIA SkillSpector.
 #
 # Triggers on all pushes to main and PRs. The detect-changes job
 # auto-discovers capability directories (capabilities/<cap>/capability.yaml)
 # and filters to only those with changed files.
+#
+# Note: SkillSpector is installed from GitHub (not PyPI yet) and runs in
+# static-only mode (--no-llm) so the workflow does not require provider
+# API keys. Security-focused capabilities may score high by design; the
+# workflow uploads SARIF to GitHub Code Scanning and reports findings but
+# does not block merges while thresholds are being tuned.
 
 name: Security Scan
 
@@ -71,8 +77,9 @@ jobs:
                 fi
               done
             done
-            # If scan-policy.yaml changed, scan everything
-            if git diff --name-only "${DIFF_BASE}" "${DIFF_HEAD}" 2>/dev/null | grep -q '^scan-policy\.yaml$'; then
+            # If security-scan workflow/script changed, scan everything
+            if git diff --name-only "${DIFF_BASE}" "${DIFF_HEAD}" 2>/dev/null \
+              | grep -qE '^(\.github/workflows/security-scan\.yml|scripts/security-scan\.sh)$'; then
               CAPS=("${ALL_CAPS[@]}")
             fi
           fi
@@ -102,9 +109,13 @@ jobs:
           enable-cache: true
 
       - name: Scan changed capabilities
+        id: scan
+        # Continue on error: security capabilities often score high by design.
+        # SARIF is uploaded for review; merge gating can be enabled once
+        # thresholds are tuned.
+        continue-on-error: true
         run: |
           set -euo pipefail
-          failed=0
           mkdir -p .sarif
 
           CAPS_JSON='${{ needs.detect-changes.outputs.capabilities }}'
@@ -120,16 +131,23 @@ jobs:
               continue
             fi
 
-            echo "==> Scanning ${skills_dir}/ (${skill_count} skills)"
-            uvx --from cisco-ai-skill-scanner skill-scanner scan-all "${skills_dir}" \
-              --recursive \
-              --use-behavioral \
-              --policy scan-policy.yaml \
-              --format summary \
+            echo "==> Scanning capabilities/${cap}/ (${skill_count} skills)"
+
+            # Human-readable terminal output for logs
+            uvx --from git+https://github.com/NVIDIA/SkillSpector \
+              skillspector scan "capabilities/${cap}" \
+              --format terminal \
+              --no-llm \
+              || echo "    ⚠ ${cap} scan reported findings (see logs above)"
+
+            # SARIF output for GitHub Code Scanning
+            uvx --from git+https://github.com/NVIDIA/SkillSpector \
+              skillspector scan "capabilities/${cap}" \
               --format sarif \
-              --output-sarif ".sarif/${cap}.sarif" \
-              --fail-on-severity high \
-            || failed=1
+              --output ".sarif/${cap}.sarif" \
+              --no-llm \
+              || echo "    ⚠ ${cap} SARIF generation reported findings"
+
             echo ""
           done
 
@@ -138,14 +156,16 @@ jobs:
             ls -la .sarif/
           fi
 
-          if [[ "${failed}" -eq 1 ]]; then
-            echo "::error::Security scan found HIGH+ severity findings"
-            exit 1
-          fi
-
       - name: Upload SARIF results
         if: always()
         uses: github/codeql-action/upload-sarif@v3
         with:
           sarif_file: .sarif/
         continue-on-error: true
+
+      - name: Report scan outcome
+        if: always()
+        run: |
+          if [[ "${{ steps.scan.outcome }}" == "failure" ]]; then
+            echo "::warning::SkillSpector reported HIGH/CRITICAL risk findings. Review the uploaded SARIF before merging."
+          fi
diff --git a/README.md b/README.md
index ac1138b..22d1b68 100644
--- a/README.md
+++ b/README.md
@@ -48,14 +48,16 @@ Every directory under `capabilities/` is a shipped, working example. Read one al
 
 ## Security scanning
 
-Every skill in this repo is scanned with [cisco-ai-defense/skill-scanner](https://github.com/cisco-ai-defense/skill-scanner) for prompt injection, data exfiltration, tool-chaining abuse, and supply chain risk. CI fails on HIGH+ findings and uploads SARIF reports to GitHub Code Scanning. The repo policy in [`scan-policy.yaml`](scan-policy.yaml) tunes the scanner for security-focused content.
+Every skill in this repo is scanned with [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) for prompt injection, data exfiltration, tool-chaining abuse, and supply chain risk. CI runs SkillSpector in static mode (`--no-llm`) for deterministic scans without provider API keys, uploads SARIF reports to GitHub Code Scanning, and reports findings. Because security-focused capabilities intentionally contain offensive security content, the workflow currently reports findings without blocking merges while thresholds are tuned.
 
 ```bash
 just security-scan                    # scan all capabilities
 just security-scan web-security       # scan one capability
-just security-scan behavioral="true"  # deep dataflow analysis
+just security-scan behavioral="true"  # ignored by SkillSpector; kept for compatibility
 ```
 
+> **Note:** SkillSpector is not yet published to PyPI. The scanner is installed from `git+https://github.com/NVIDIA/SkillSpector` on each run; uv caches the build aggressively.
+
 ## Contributing
 
 This repo is published for reference, not as a contribution target — we don't generally accept external PRs that add new capabilities. See [CONTRIBUTING.md](CONTRIBUTING.md) for what's useful to send and how to build your own capabilities instead.
diff --git a/justfile b/justfile
index b1335dc..f970479 100644
--- a/justfile
+++ b/justfile
@@ -14,12 +14,14 @@ validate strict="false":
     [[ "{{ strict }}" == "true" ]] && cmd+=(--strict)
     "${cmd[@]}"
 
-# Security scan all skills (pass capability to scan one, behavioral="true" for deep analysis)
+# Security scan all skills (pass capability to scan one)
+# Note: behavioral="true" is a no-op kept for compatibility; SkillSpector uses
+# --no-llm static analysis by default. Pass --llm to scripts/security-scan.sh
+# to enable LLM semantic analysis (requires API keys).
 security-scan capability="" behavioral="false":
     #!/usr/bin/env bash
     set -euo pipefail
     cmd=(./scripts/security-scan.sh)
-    [[ "{{ behavioral }}" == "true" ]] && cmd+=(--behavioral)
     [[ -n "{{ capability }}" ]] && cmd+=("{{ capability }}")
     "${cmd[@]}"
 
diff --git a/scan-policy.yaml b/scan-policy.yaml.cisco-legacy
similarity index 100%
rename from scan-policy.yaml
rename to scan-policy.yaml.cisco-legacy
diff --git a/scripts/security-scan.sh b/scripts/security-scan.sh
index d3e8309..09a4df4 100755
--- a/scripts/security-scan.sh
+++ b/scripts/security-scan.sh
@@ -1,21 +1,24 @@
 #!/usr/bin/env bash
-# security-scan.sh — Run cisco-ai-skill-scanner across all capabilities
+# security-scan.sh — Run NVIDIA SkillSpector across all capabilities
 #
 # Usage:
 #   ./scripts/security-scan.sh                      # scan all capabilities, summary
 #   ./scripts/security-scan.sh web-security         # scan one capability
 #   ./scripts/security-scan.sh --format json        # JSON output
-#   ./scripts/security-scan.sh --ci                 # CI mode: SARIF + fail on high
-#   ./scripts/security-scan.sh --behavioral         # enable behavioral analysis
+#   ./scripts/security-scan.sh --sarif FILE         # SARIF output
 #
 # Requires: uv (https://docs.astral.sh/uv/)
-# Package:  cisco-ai-skill-scanner (installed automatically via uvx)
+# Package:  skillspector (installed from git, see pyproject.toml)
+#
+# Note: SkillSpector is not yet on PyPI. We install from the public
+# GitHub repo. Use --no-llm in CI to keep scans deterministic and avoid
+# needing provider API keys.
 
 set -euo pipefail
 
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
-POLICY="${REPO_ROOT}/scan-policy.yaml"
-SCANNER="uvx --from cisco-ai-skill-scanner skill-scanner"
+# SkillSpector is not published to PyPI yet; install from git.
+SCANNER="uvx --from git+https://github.com/NVIDIA/SkillSpector skillspector"
 
 # Auto-discover capability directories under capabilities/
 CAPABILITY_DIRS=()
@@ -27,28 +30,24 @@ for dir in "${REPO_ROOT}"/capabilities/*/; do
 done
 
 # Defaults
-FORMAT="summary"
-CI_MODE=false
-USE_BEHAVIORAL=false
-FAIL_SEVERITY=""
+FORMAT="terminal"
+TARGET_CAPABILITY=""
 OUTPUT_SARIF=""
 OUTPUT_JSON=""
-TARGET_CAPABILITY=""
+NO_LLM=true
 EXTRA_ARGS=()
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [OPTIONS] [CAPABILITY]
 
-Scan capabilities for security issues using cisco-ai-skill-scanner.
+Scan capabilities for security issues using NVIDIA SkillSpector.
 
 Options:
-  --ci              CI mode: produce SARIF, fail on high+ severity
-  --format FMT      Output format: summary|json|markdown|table|sarif|html
-  --behavioral      Enable behavioral dataflow analysis (slower, deeper)
-  --fail-on SEV     Fail if findings >= severity (critical|high|medium|low)
+  --format FMT      Output format: terminal|json|markdown|sarif [default: terminal]
   --sarif FILE      Write SARIF report to FILE
   --json FILE       Write JSON report to FILE
+  --llm             Enable LLM semantic analysis (requires API keys)
   -h, --help        Show this help
 
 Arguments:
@@ -57,40 +56,31 @@ Arguments:
 Examples:
   $(basename "$0")                    # scan everything, summary output
   $(basename "$0") web-security       # scan one capability
-  $(basename "$0") --ci               # CI pipeline mode
-  $(basename "$0") --behavioral       # deep analysis
+  $(basename "$0") --format sarif --sarif report.sarif
 EOF
     exit 0
 }
 
 while [[ $# -gt 0 ]]; do
     case "$1" in
-        --ci)
-            CI_MODE=true
-            FORMAT="summary"
-            FAIL_SEVERITY="high"
-            shift
-            ;;
         --format)
             FORMAT="$2"
             shift 2
             ;;
-        --behavioral)
-            USE_BEHAVIORAL=true
-            shift
-            ;;
-        --fail-on)
-            FAIL_SEVERITY="$2"
-            shift 2
-            ;;
         --sarif)
             OUTPUT_SARIF="$2"
+            FORMAT="sarif"
             shift 2
             ;;
         --json)
             OUTPUT_JSON="$2"
+            FORMAT="json"
             shift 2
             ;;
+        --llm)
+            NO_LLM=false
+            shift
+            ;;
         -h|--help)
             usage
             ;;
@@ -117,25 +107,16 @@ fi
 # Build scanner command
 build_cmd() {
     local cap_dir="$1"
-    local cmd=(${SCANNER} scan-all "${REPO_ROOT}/capabilities/${cap_dir}")
-    cmd+=(--recursive --lenient)
-    cmd+=(--policy "${POLICY}")
+    local output_path="$2"
+    local cmd=(${SCANNER} scan "${REPO_ROOT}/capabilities/${cap_dir}")
     cmd+=(--format "${FORMAT}")
 
-    if [[ "${USE_BEHAVIORAL}" == true ]]; then
-        cmd+=(--use-behavioral)
+    if [[ "${NO_LLM}" == true ]]; then
+        cmd+=(--no-llm)
     fi
 
-    if [[ -n "${FAIL_SEVERITY}" ]]; then
-        cmd+=(--fail-on-severity "${FAIL_SEVERITY}")
-    fi
-
-    if [[ -n "${OUTPUT_SARIF}" ]]; then
-        cmd+=(--output-sarif "${OUTPUT_SARIF}")
-    fi
-
-    if [[ -n "${OUTPUT_JSON}" ]]; then
-        cmd+=(--output-json "${OUTPUT_JSON}")
+    if [[ -n "${output_path}" ]]; then
+        cmd+=(--output "${output_path}")
     fi
 
     cmd+=("${EXTRA_ARGS[@]+"${EXTRA_ARGS[@]}"}")
@@ -144,8 +125,6 @@ build_cmd() {
 }
 
 # Run scans
-overall_exit=0
-
 for cap_dir in "${CAPABILITY_DIRS[@]}"; do
     if [[ ! -d "${REPO_ROOT}/capabilities/${cap_dir}" ]]; then
         continue
@@ -160,45 +139,23 @@ for cap_dir in "${CAPABILITY_DIRS[@]}"; do
 
     echo "==> Scanning ${cap_dir}/ (${skill_count} skills)"
 
-    cmd=$(build_cmd "${cap_dir}")
-
-    if [[ "${CI_MODE}" == true ]]; then
-        # In CI mode, capture SARIF per-capability and merge later
-        sarif_file="${REPO_ROOT}/.security-scan-${cap_dir}.sarif"
-        cmd="${cmd} --output-sarif ${sarif_file}"
+    output_path=""
+    if [[ -n "${OUTPUT_SARIF}" ]]; then
+        output_path="${OUTPUT_SARIF}"
+    elif [[ -n "${OUTPUT_JSON}" ]]; then
+        output_path="${OUTPUT_JSON}"
     fi
 
+    cmd=$(build_cmd "${cap_dir}" "${output_path}")
+
+    # SkillSpector exits 1 when risk_score > 50. Security-focused
+    # capabilities often score high, so we report findings but do not
+    # fail the wrapper by default. CI can decide whether to gate merges.
     if eval "${cmd}"; then
-        echo "    ✓ ${cap_dir}/ passed"
+        echo "    ✓ ${cap_dir}/ scan completed"
     else
         exit_code=$?
-        echo "    ✗ ${cap_dir}/ has findings (exit ${exit_code})"
-        overall_exit=1
+        echo "    ⚠ ${cap_dir}/ scan completed with findings (exit ${exit_code})"
     fi
     echo ""
 done
-
-# CI summary
-if [[ "${CI_MODE}" == true ]]; then
-    sarif_files=()
-    for cap_dir in "${CAPABILITY_DIRS[@]}"; do
-        f="${REPO_ROOT}/.security-scan-${cap_dir}.sarif"
-        if [[ -f "${f}" ]]; then
-            sarif_files+=("${f}")
-        fi
-    done
-
-    if [[ ${#sarif_files[@]} -gt 0 ]]; then
-        if [[ -n "${OUTPUT_SARIF}" ]]; then
-            cp "${sarif_files[-1]}" "${OUTPUT_SARIF}"
-        fi
-        echo "SARIF reports: ${sarif_files[*]}"
-    fi
-fi
-
-if [[ "${overall_exit}" -eq 0 ]]; then
-    echo "All scans passed."
-else
-    echo "Security scan found issues above threshold."
-    exit 1
-fi
diff --git a/scripts/test_security_scan.sh b/scripts/test_security_scan.sh
index 8004fac..654235b 100755
--- a/scripts/test_security_scan.sh
+++ b/scripts/test_security_scan.sh
@@ -1,14 +1,11 @@
 #!/usr/bin/env bash
-# test_security_scan.sh — Integration tests for the security scanning setup.
+# test_security_scan.sh — Integration tests for the SkillSpector scanning setup.
 #
 # Verifies that:
-#   1. The scanner is installable and runnable
-#   2. The custom policy loads without errors
-#   3. Individual skill scans produce valid output
-#   4. Batch scanning works across capabilities
-#   5. CI mode produces SARIF output
-#   6. The --fail-on-severity flag works correctly
-#   7. A deliberately malicious skill is caught
+#   1. SkillSpector is installable and runnable from git
+#   2. Individual skill scans produce valid output
+#   3. SARIF output is generated and valid
+#   4. A deliberately malicious skill scores higher than a clean one
 #
 # Usage:
 #   ./scripts/test_security_scan.sh         # run all tests
@@ -19,7 +16,7 @@
 set -euo pipefail
 
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
-SCANNER="uvx --from cisco-ai-skill-scanner skill-scanner"
+SCANNER="uvx --from git+https://github.com/NVIDIA/SkillSpector skillspector"
 PASS=0
 FAIL=0
 VERBOSE=false
@@ -53,15 +50,6 @@ fail() {
     fi
 }
 
-assert_eq() {
-    local desc="$1" expected="$2" actual="$3"
-    if [[ "${expected}" == "${actual}" ]]; then
-        pass "${desc}"
-    else
-        fail "${desc}" "expected='${expected}' actual='${actual}'"
-    fi
-}
-
 assert_contains() {
     local desc="$1" haystack="$2" needle="$3"
     if echo "${haystack}" | grep -qF -- "${needle}"; then
@@ -80,192 +68,97 @@ assert_file_exists() {
     fi
 }
 
-assert_exit_code() {
-    local desc="$1" expected="$2"
-    shift 2
-    local actual=0
-    "$@" > /dev/null 2>&1 || actual=$?
-    if [[ "${expected}" -eq "${actual}" ]]; then
+assert_json_field_gt() {
+    local desc="$1" file="$2" jpath="$3" min="$4"
+    local actual
+    actual=$(jq -r "${jpath} // 0" "${file}")
+    if [[ "$(echo "${actual} > ${min}" | bc -l)" == "1" ]]; then
         pass "${desc}"
     else
-        fail "${desc}" "expected exit ${expected}, got ${actual}"
+        fail "${desc}" "expected ${jpath} > ${min}, got ${actual}"
     fi
 }
 
 # --- Tests ---------------------------------------------------------------
 
-echo "=== Security Scan Tests ==="
+echo "=== Security Scan Tests (SkillSpector) ==="
 echo ""
 setup_tmpdir
 
 # 1. Scanner is available
 echo "[1] Scanner availability"
-if ${SCANNER} --version > /dev/null 2>&1; then
-    pass "skill-scanner is available via uvx"
+version_out=$(${SCANNER} --version 2>&1) || true
+if echo "${version_out}" | grep -q "SkillSpector"; then
+    pass "SkillSpector is available via uvx from git"
 else
-    fail "skill-scanner is not available"
-    echo "FATAL: Cannot continue without scanner. Install with: pip install cisco-ai-skill-scanner"
+    fail "SkillSpector is not available"
+    echo "FATAL: Cannot continue without scanner."
     exit 1
 fi
 
-# 2. Custom policy loads
-echo ""
-echo "[2] Custom policy"
-output=$(${SCANNER} scan-all "${REPO_ROOT}/capabilities/ai-red-teaming" \
-    --recursive --lenient \
-    --policy "${REPO_ROOT}/scan-policy.yaml" \
-    --format json --compact 2>&1) || true
-assert_contains "policy loads without errors" "${output}" '"summary"'
-
-# Extract policy name from output
-policy_name=$(echo "${output}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    r = data.get('results', [{}])[0]
-    print(r.get('scan_metadata', {}).get('policy_name', ''))
-except: print('')
-" 2>/dev/null) || true
-assert_eq "policy name is 'capabilities'" "capabilities" "${policy_name}"
-
-# 3. Individual skill scan
+# 2. Individual skill scan produces JSON
 echo ""
-echo "[3] Individual skill scan"
-# Find a real skill to test
+echo "[2] Individual skill scan"
 skill_dir=$(find "${REPO_ROOT}/capabilities/ai-red-teaming" -name "SKILL.md" -type f -print -quit 2>/dev/null | xargs dirname)
 if [[ -n "${skill_dir}" ]]; then
-    scan_out=$(${SCANNER} scan "${skill_dir}" \
-        --lenient --policy "${REPO_ROOT}/scan-policy.yaml" \
-        --format json --compact 2>&1)
-    assert_contains "scan produces valid JSON" "${scan_out}" '"skill_name"'
-    assert_contains "scan includes is_safe field" "${scan_out}" '"is_safe"'
-    assert_contains "scan includes analyzers_used" "${scan_out}" '"analyzers_used"'
-
-    # Verify static + bytecode + pipeline analyzers ran
-    analyzers=$(echo "${scan_out}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    print(' '.join(data.get('analyzers_used', [])))
-except: print('')
-" 2>/dev/null) || true
-    assert_contains "static analyzer ran" "${analyzers}" "static"
-    assert_contains "pipeline analyzer ran" "${analyzers}" "pipeline"
+    scan_out=$(${SCANNER} scan "${skill_dir}" --format json --no-llm 2>&1)
+    assert_contains "scan produces JSON output" "${scan_out}" '"risk_assessment"'
+    assert_contains "scan includes issues array" "${scan_out}" '"issues"'
 else
     fail "no skills found to test"
 fi
 
-# 4. Batch scan produces summary
-echo ""
-echo "[4] Batch scanning"
-batch_out=$(${SCANNER} scan-all "${REPO_ROOT}/capabilities/ai-red-teaming" \
-    --recursive --lenient \
-    --policy "${REPO_ROOT}/scan-policy.yaml" \
-    --format json --compact 2>&1)
-total=$(echo "${batch_out}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    print(data.get('summary', {}).get('total_skills_scanned', 0))
-except: print(0)
-" 2>/dev/null) || true
-
-if [[ "${total}" -gt 0 ]]; then
-    pass "batch scan found ${total} skills"
-else
-    fail "batch scan found no skills"
-fi
-
-# 5. SARIF output
+# 3. SARIF output
 echo ""
-echo "[5] SARIF output"
+echo "[3] SARIF output"
 sarif_file="${TMPDIR_BASE}/test.sarif"
-${SCANNER} scan-all "${REPO_ROOT}/capabilities/ai-red-teaming" \
-    --recursive --lenient \
-    --policy "${REPO_ROOT}/scan-policy.yaml" \
-    --format sarif \
-    --output-sarif "${sarif_file}" 2>/dev/null || true
+${SCANNER} scan "${skill_dir}" --format sarif --no-llm --output "${sarif_file}" 2>/dev/null || true
 assert_file_exists "SARIF file created" "${sarif_file}"
 
 if [[ -f "${sarif_file}" ]]; then
-    sarif_version=$(python3 -c "
-import json
-with open('${sarif_file}') as f:
-    data = json.load(f)
-print(data.get('version', ''))
-" 2>/dev/null) || true
-    assert_eq "SARIF version is 2.1.0" "2.1.0" "${sarif_version}"
+    sarif_version=$(jq -r '.version' "${sarif_file}" 2>/dev/null || true)
+    assert_contains "SARIF version is 2.1.0" "${sarif_version}" "2.1.0"
 
-    sarif_tool=$(python3 -c "
-import json
-with open('${sarif_file}') as f:
-    data = json.load(f)
-print(data.get('runs', [{}])[0].get('tool', {}).get('driver', {}).get('name', ''))
-" 2>/dev/null) || true
-    assert_eq "SARIF tool is skill-scanner" "skill-scanner" "${sarif_tool}"
+    sarif_tool=$(jq -r '.runs[0].tool.driver.name' "${sarif_file}" 2>/dev/null || true)
+    assert_contains "SARIF tool is skillspector" "${sarif_tool}" "skillspector"
 fi
 
-# 6. Fail-on-severity flag
+# 4. Malicious skill scores higher than clean skill
 echo ""
-echo "[6] Severity threshold"
-# Use pre-built malicious skill fixture
-malicious_dir="${REPO_ROOT}/scripts/fixtures/malicious-skill"
-
-# This should find issues (redirect stderr to avoid warnings corrupting JSON)
-malicious_out=$(${SCANNER} scan "${malicious_dir}" \
-    --lenient --format json --compact 2>/dev/null) || true
-is_safe=$(echo "${malicious_out}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    print(str(data.get('is_safe', True)).lower())
-except: print('true')
-" 2>/dev/null) || true
-assert_eq "malicious skill detected as unsafe" "false" "${is_safe}"
-
-findings_count=$(echo "${malicious_out}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    print(data.get('findings_count', 0))
-except: print(0)
-" 2>/dev/null) || true
-
-if [[ "${findings_count}" -gt 0 ]]; then
-    pass "malicious skill produced ${findings_count} findings"
-else
-    fail "malicious skill produced no findings"
-fi
+echo "[4] Malicious vs clean skill detection"
+malicious_json="${TMPDIR_BASE}/malicious.json"
+clean_json="${TMPDIR_BASE}/clean.json"
+
+${SCANNER} scan "${REPO_ROOT}/scripts/fixtures/malicious-skill" \
+  --format json --no-llm --output "${malicious_json}" 2>/dev/null || true
+${SCANNER} scan "${REPO_ROOT}/scripts/fixtures/clean-skill" \
+  --format json --no-llm --output "${clean_json}" 2>/dev/null || true
+
+assert_file_exists "malicious skill JSON report created" "${malicious_json}"
+assert_file_exists "clean skill JSON report created" "${clean_json}"
+
+if [[ -f "${malicious_json}" && -f "${clean_json}" ]]; then
+    assert_json_field_gt "malicious skill has higher risk score than clean" \
+      "${malicious_json}" '.risk_assessment.score' '0'
+
+    clean_score=$(jq -r '.risk_assessment.score // 0' "${clean_json}")
+    malicious_score=$(jq -r '.risk_assessment.score // 0' "${malicious_json}")
+    if [[ "${malicious_score}" -gt "${clean_score}" ]]; then
+        pass "malicious skill (${malicious_score}) scores higher than clean (${clean_score})"
+    else
+        fail "malicious skill (${malicious_score}) did not score higher than clean (${clean_score})"
+    fi
 
-# --fail-on-severity should return non-zero for malicious content
-set +e
-${SCANNER} scan "${malicious_dir}" \
-    --lenient --format json --compact \
-    --fail-on-severity medium > /dev/null 2>&1
-exit_code=$?
-set -e
-if [[ "${exit_code}" -ne 0 ]]; then
-    pass "--fail-on-severity returns non-zero for malicious skill"
-else
-    fail "--fail-on-severity returned 0 for malicious skill" "exit code: ${exit_code}"
+    if [[ "${malicious_score}" -gt 50 ]]; then
+        pass "malicious skill exceeds risk threshold (>50)"
+    else
+        fail "malicious skill did not exceed risk threshold" "score=${malicious_score}"
+    fi
 fi
 
-# Clean skill fixture should pass
-clean_dir="${REPO_ROOT}/scripts/fixtures/clean-skill"
-clean_out=$(${SCANNER} scan "${clean_dir}" \
-    --lenient --format json --compact 2>&1) || true
-clean_safe=$(echo "${clean_out}" | python3 -c "
-import json, sys
-try:
-    data = json.load(sys.stdin)
-    print(str(data.get('is_safe', False)).lower())
-except: print('false')
-" 2>/dev/null) || true
-assert_eq "clean skill detected as safe" "true" "${clean_safe}"
-
-# 7. Script wrapper
+# 5. Script wrapper
 echo ""
-echo "[7] Script wrapper"
+echo "[5] Script wrapper"
 assert_file_exists "security-scan.sh exists" "${REPO_ROOT}/scripts/security-scan.sh"
 if [[ -x "${REPO_ROOT}/scripts/security-scan.sh" ]]; then
     pass "security-scan.sh is executable"
@@ -273,24 +166,13 @@ else
     fail "security-scan.sh is not executable"
 fi
 
-# Test --help
 help_out=$("${REPO_ROOT}/scripts/security-scan.sh" --help 2>&1) || true
 assert_contains "help shows usage" "${help_out}" "Usage"
-assert_contains "help shows --ci flag" "${help_out}" "--ci"
+assert_contains "help shows --format flag" "${help_out}" "--format"
 
-# Test wrapper runs successfully on a single capability
 wrapper_out=$("${REPO_ROOT}/scripts/security-scan.sh" ai-red-teaming 2>&1) || true
 assert_contains "wrapper scans ai-red-teaming" "${wrapper_out}" "ai-red-teaming"
 
-# 8. Behavioral analysis
-echo ""
-echo "[8] Behavioral analysis"
-behavioral_out=$(${SCANNER} scan "${skill_dir}" \
-    --lenient --use-behavioral \
-    --policy "${REPO_ROOT}/scan-policy.yaml" \
-    --format json --compact 2>&1) || true
-assert_contains "behavioral analyzer available" "${behavioral_out}" "behavioral"
-
 # --- Summary -------------------------------------------------------------
 echo ""
 echo "=== Results: ${PASS} passed, ${FAIL} failed ==="