Skip to content

Commit ab7f6a2

Browse files
committed
fix(sandbox): resolve symlinked binary paths in network policy matching
Policy binary paths specified as symlinks (e.g., /usr/bin/python3) were silently denied because the kernel reports the canonical path via /proc/<pid>/exe (e.g., /usr/bin/python3.11). The strict string equality in Rego never matched. Expand policy binary paths by resolving symlinks through the container filesystem (/proc/<pid>/root/) after the entrypoint starts. The OPA data now contains both the original and resolved paths, so Rego's existing strict equality check naturally matches either. - Add resolve_binary_in_container() helper for Linux symlink resolution - Add from_proto_with_pid() and reload_from_proto_with_pid() to OpaEngine - Trigger one-shot OPA rebuild after entrypoint_pid is stored - Thread entrypoint_pid through run_policy_poll_loop for hot-reloads - Improve deny reason with symlink debugging hint - Add 18 new tests including hot-reload and Linux symlink e2e tests Closes #770
1 parent 13051df commit ab7f6a2

3 files changed

Lines changed: 712 additions & 13 deletions

File tree

crates/openshell-sandbox/data/sandbox-policy.rego

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ deny_reason := reason if {
4747
policy := data.network_policies[name]
4848
endpoint_allowed(policy, input.network)
4949
not binary_allowed(policy, input.exec)
50-
r := sprintf("binary '%s' (ancestors: [%s], cmdline: [%s]) not allowed in policy '%s'", [input.exec.path, ancestors_str, cmdline_str, name])
50+
r := sprintf("binary '%s' (ancestors: [%s], cmdline: [%s]) not allowed in policy '%s' (hint: binary path is kernel-resolved via /proc/<pid>/exe; if you specified a symlink like /usr/bin/python3, the actual binary may be /usr/bin/python3.11)", [input.exec.path, ancestors_str, cmdline_str, name])
5151
]
5252
all_reasons := array.concat(endpoint_misses, binary_misses)
5353
count(all_reasons) > 0

crates/openshell-sandbox/src/lib.rs

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ pub async fn run_sandbox(
247247
// Load policy and initialize OPA engine
248248
let openshell_endpoint_for_proxy = openshell_endpoint.clone();
249249
let sandbox_name_for_agg = sandbox.clone();
250-
let (policy, opa_engine) = load_policy(
250+
let (policy, opa_engine, retained_proto) = load_policy(
251251
sandbox_id.clone(),
252252
sandbox,
253253
openshell_endpoint.clone(),
@@ -714,6 +714,25 @@ pub async fn run_sandbox(
714714
.build()
715715
);
716716

717+
// Resolve policy binary symlinks now that the container filesystem is
718+
// accessible via /proc/<pid>/root/. This expands symlinks like
719+
// /usr/bin/python3 → /usr/bin/python3.11 in the OPA policy data so that
720+
// either path matches at evaluation time.
721+
if let (Some(engine), Some(proto)) = (&opa_engine, &retained_proto) {
722+
let pid = handle.pid();
723+
if let Err(e) = engine.reload_from_proto_with_pid(proto, pid) {
724+
warn!(
725+
error = %e,
726+
"Failed to resolve binary symlinks in policy (non-fatal)"
727+
);
728+
} else {
729+
info!(
730+
pid = pid,
731+
"Resolved policy binary symlinks via container filesystem"
732+
);
733+
}
734+
}
735+
717736
// Spawn background policy poll task (gRPC mode only).
718737
if let (Some(id), Some(endpoint), Some(engine)) =
719738
(&sandbox_id, &openshell_endpoint, &opa_engine)
@@ -722,6 +741,7 @@ pub async fn run_sandbox(
722741
let poll_endpoint = endpoint.clone();
723742
let poll_engine = engine.clone();
724743
let poll_ocsf_enabled = ocsf_enabled.clone();
744+
let poll_pid = entrypoint_pid.clone();
725745
let poll_interval_secs: u64 = std::env::var("OPENSHELL_POLICY_POLL_INTERVAL_SECS")
726746
.ok()
727747
.and_then(|v| v.parse().ok())
@@ -732,6 +752,7 @@ pub async fn run_sandbox(
732752
&poll_endpoint,
733753
&poll_id,
734754
&poll_engine,
755+
&poll_pid,
735756
poll_interval_secs,
736757
&poll_ocsf_enabled,
737758
)
@@ -1426,13 +1447,21 @@ mod baseline_tests {
14261447
/// 2. If `sandbox_id` and `openshell_endpoint` are provided, fetch via gRPC
14271448
/// 3. If the server returns no policy, discover from disk or use restrictive default
14281449
/// 4. Otherwise, return an error
1450+
///
1451+
/// Returns the policy, the OPA engine, and (for gRPC mode) the original proto
1452+
/// policy. The proto is retained so the OPA engine can be rebuilt with symlink
1453+
/// resolution after the container entrypoint starts.
14291454
async fn load_policy(
14301455
sandbox_id: Option<String>,
14311456
sandbox: Option<String>,
14321457
openshell_endpoint: Option<String>,
14331458
policy_rules: Option<String>,
14341459
policy_data: Option<String>,
1435-
) -> Result<(SandboxPolicy, Option<Arc<OpaEngine>>)> {
1460+
) -> Result<(
1461+
SandboxPolicy,
1462+
Option<Arc<OpaEngine>>,
1463+
Option<openshell_core::proto::SandboxPolicy>,
1464+
)> {
14361465
// File mode: load OPA engine from rego rules + YAML data (dev override)
14371466
if let (Some(policy_file), Some(data_file)) = (&policy_rules, &policy_data) {
14381467
ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx())
@@ -1461,7 +1490,7 @@ async fn load_policy(
14611490
process: config.process,
14621491
};
14631492
enrich_sandbox_baseline_paths(&mut policy);
1464-
return Ok((policy, Some(Arc::new(engine))));
1493+
return Ok((policy, Some(Arc::new(engine)), None));
14651494
}
14661495

14671496
// gRPC mode: fetch typed proto policy, construct OPA engine from baked rules + proto data
@@ -1524,11 +1553,14 @@ async fn load_policy(
15241553
// Build OPA engine from baked-in rules + typed proto data.
15251554
// In cluster mode, proxy networking is always enabled so OPA is
15261555
// always required for allow/deny decisions.
1556+
// The initial load uses pid=0 (no symlink resolution) because the
1557+
// container hasn't started yet. After the entrypoint spawns, the
1558+
// engine is rebuilt with the real PID for symlink resolution.
15271559
info!("Creating OPA engine from proto policy data");
15281560
let opa_engine = Some(Arc::new(OpaEngine::from_proto(&proto_policy)?));
15291561

1530-
let policy = SandboxPolicy::try_from(proto_policy)?;
1531-
return Ok((policy, opa_engine));
1562+
let policy = SandboxPolicy::try_from(proto_policy.clone())?;
1563+
return Ok((policy, opa_engine, Some(proto_policy)));
15321564
}
15331565

15341566
// No policy source available
@@ -1838,12 +1870,16 @@ async fn flush_proposals_to_gateway(
18381870
Ok(())
18391871
}
18401872

1841-
/// `reload_from_proto()`. Reports load success/failure back to the server.
1842-
/// On failure, the previous engine is untouched (LKG behavior).
1873+
/// `reload_from_proto_with_pid()`. Reports load success/failure back to the
1874+
/// server. On failure, the previous engine is untouched (LKG behavior).
1875+
///
1876+
/// When the entrypoint PID is available, policy reloads include symlink
1877+
/// resolution for binary paths via the container filesystem.
18431878
async fn run_policy_poll_loop(
18441879
endpoint: &str,
18451880
sandbox_id: &str,
18461881
opa_engine: &Arc<OpaEngine>,
1882+
entrypoint_pid: &Arc<AtomicU32>,
18471883
interval_secs: u64,
18481884
ocsf_enabled: &std::sync::atomic::AtomicBool,
18491885
) -> Result<()> {
@@ -1924,7 +1960,8 @@ async fn run_policy_poll_loop(
19241960
continue;
19251961
};
19261962

1927-
match opa_engine.reload_from_proto(policy) {
1963+
let pid = entrypoint_pid.load(Ordering::Acquire);
1964+
match opa_engine.reload_from_proto_with_pid(policy, pid) {
19281965
Ok(()) => {
19291966
if result.global_policy_version > 0 {
19301967
ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx())

0 commit comments

Comments
 (0)