From fc404a7a4ed53981a37efe4b14030c0cce8405ce Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 12:42:21 +0800 Subject: [PATCH 01/24] feat(agent): add ai agent enforcement config --- agent/src/config/config.rs | 76 ++++++++++++++++ agent/src/config/handler.rs | 38 +++++++- server/agent_config/README-CH.md | 146 +++++++++++++++++++++++++++++- server/agent_config/README.md | 146 +++++++++++++++++++++++++++++- server/agent_config/template.yaml | 63 +++++++++++++ 5 files changed, 463 insertions(+), 6 deletions(-) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 9da801c8356..76f49642eff 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -607,6 +607,7 @@ pub struct AiAgentConfig { pub http_endpoints: Vec, pub max_payload_size: usize, pub file_io_enabled: bool, + pub enforcement: AiAgentEnforcementConfig, } impl Default for AiAgentConfig { @@ -619,10 +620,85 @@ impl Default for AiAgentConfig { ], max_payload_size: 0, // 0 means unlimited file_io_enabled: true, + enforcement: AiAgentEnforcementConfig::default(), } } } +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentEnforcementConfig { + pub enabled: bool, + pub mode: String, + pub default_fallback: String, + pub max_rules: usize, + pub rules: Vec, +} + +impl Default for AiAgentEnforcementConfig { + fn default() -> Self { + Self { + enabled: false, + mode: "audit_only".to_string(), + default_fallback: "sigkill".to_string(), + max_rules: 256, + rules: Vec::new(), + } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentEnforcementRule { + pub id: String, + pub description: String, + pub scope: String, + pub target_type: String, + pub action: AiAgentEnforcementAction, + pub audit: bool, + pub exec: AiAgentExecMatch, +} + +impl Default for AiAgentEnforcementRule { + fn default() -> Self { + Self { + id: String::new(), + description: String::new(), + scope: "ai_agent_tree".to_string(), + target_type: "exec".to_string(), + action: AiAgentEnforcementAction::default(), + audit: true, + exec: AiAgentExecMatch::default(), + } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentEnforcementAction { + #[serde(rename = "type")] + pub action_type: String, + pub errno: String, +} + +impl Default for AiAgentEnforcementAction { + fn default() -> Self { + Self { + action_type: "deny".to_string(), + errno: "EPERM".to_string(), + } + } +} + +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentExecMatch { + pub exact: Vec, + pub prefix: Vec, + pub suffix: Vec, + pub argv_contains_any: Vec, +} + #[derive(Clone, Debug, Deserialize, PartialEq, Eq)] #[serde(default)] pub struct Proc { diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index f3b14ef8b37..83f3d2c828b 100644 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -58,10 +58,11 @@ use tokio::runtime::Runtime; use super::config::{Ebpf, EbpfFileIoEvent, ProcessMatcher, SymbolTable}; use super::{ config::{ - ApiResources, Config, DpdkSource, ExtraLogFields, ExtraLogFieldsInfo, HttpEndpoint, - HttpEndpointMatchRule, Iso8583ParseConfig, NetSignParseConfig, OracleConfig, PcapStream, - PortConfig, ProcessorsFlowLogTunning, RequestLogTunning, SessionTimeout, TagFilterOperator, - Timeouts, UserConfig, WebSphereMqParseConfig, GRPC_BUFFER_SIZE_MIN, + AiAgentEnforcementConfig, ApiResources, Config, DpdkSource, ExtraLogFields, + ExtraLogFieldsInfo, HttpEndpoint, HttpEndpointMatchRule, Iso8583ParseConfig, + NetSignParseConfig, OracleConfig, PcapStream, PortConfig, ProcessorsFlowLogTunning, + RequestLogTunning, SessionTimeout, TagFilterOperator, Timeouts, UserConfig, + WebSphereMqParseConfig, GRPC_BUFFER_SIZE_MIN, }, ConfigError, KubernetesPollerType, TrafficOverflowAction, }; @@ -1213,6 +1214,7 @@ pub struct LogParserConfig { pub ai_agent_endpoints: Vec, pub ai_agent_max_payload_size: usize, pub ai_agent_file_io_enabled: bool, + pub ai_agent_enforcement: AiAgentEnforcementConfig, } impl Default for LogParserConfig { @@ -1240,6 +1242,7 @@ impl Default for LogParserConfig { ], ai_agent_max_payload_size: usize::MAX, // default: unlimited (config 0 → usize::MAX) ai_agent_file_io_enabled: true, + ai_agent_enforcement: AiAgentEnforcementConfig::default(), } } } @@ -1290,6 +1293,7 @@ impl fmt::Debug for LogParserConfig { .field("ai_agent_endpoints", &self.ai_agent_endpoints) .field("ai_agent_max_payload_size", &self.ai_agent_max_payload_size) .field("ai_agent_file_io_enabled", &self.ai_agent_file_io_enabled) + .field("ai_agent_enforcement", &self.ai_agent_enforcement) .finish() } } @@ -2426,6 +2430,7 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig { conf.inputs.proc.ai_agent.max_payload_size }, ai_agent_file_io_enabled: conf.inputs.proc.ai_agent.file_io_enabled, + ai_agent_enforcement: conf.inputs.proc.ai_agent.enforcement.clone(), }, debug: DebugConfig { agent_id: conf.global.common.agent_id as u16, @@ -6176,6 +6181,31 @@ mod tests { ); } + #[test] + fn test_ai_agent_enforcement_defaults_disabled() { + let config = LogParserConfig::default(); + assert!(!config.ai_agent_enforcement.enabled); + assert_eq!(config.ai_agent_enforcement.mode.as_str(), "audit_only"); + assert!(config.ai_agent_enforcement.rules.is_empty()); + } + + #[test] + fn test_log_parser_debug_includes_ai_agent_enforcement() { + let mut config = LogParserConfig::default(); + config.ai_agent_enforcement.enabled = true; + config.ai_agent_enforcement.mode = "block".to_string(); + + let debug = format!("{config:?}"); + assert!( + debug.contains("ai_agent_enforcement"), + "debug output missing ai_agent_enforcement: {debug}" + ); + assert!( + debug.contains("block"), + "debug output missing ai_agent enforcement mode: {debug}" + ); + } + #[cfg(any(target_os = "linux", target_os = "android"))] fn available_cpu_ids(cpu_set: &CpuSet) -> Vec { (0..libc::CPU_SETSIZE as usize) diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index c41ce86d56b..e87288cacc9 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2360,6 +2360,151 @@ inputs: 是否开启 AI Agent 文件 IO 事件采集。 +#### 执行阻断 {#inputs.proc.ai_agent.enforcement} + +AI Agent 命令执行阻断。第一版仅支持 exec 命令审计/阻断。 + +##### 开启执行阻断 {#inputs.proc.ai_agent.enforcement.enabled} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.enabled` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: false +``` + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | bool | + +##### 模式 {#inputs.proc.ai_agent.enforcement.mode} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.mode` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + mode: audit_only +``` + +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| audit_only | | +| block | | + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +##### 默认降级动作 {#inputs.proc.ai_agent.enforcement.default_fallback} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.default_fallback` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + default_fallback: sigkill +``` + +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| sigkill | | + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +##### 最大规则数 {#inputs.proc.ai_agent.enforcement.max_rules} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.max_rules` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + max_rules: 256 +``` + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | int | +| Range | [0, 1024] | + +##### 规则 {#inputs.proc.ai_agent.enforcement.rules} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.rules` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + rules: [] +``` + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | dict | + +**详细描述**: + +AI Agent 命令执行阻断规则。第一版支持 exec 命令 exact/prefix/suffix 匹配。 + ### 符号表 {#inputs.proc.symbol_table} #### Golang 特有 {#inputs.proc.symbol_table.golang_specific} @@ -11698,4 +11843,3 @@ dev: **详细描述**: 未发布的采集器特性可以通过该选项开启。 - diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 7a623bdea8a..a13d363faf5 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2391,6 +2391,151 @@ inputs: Whether to enable AI Agent file IO event collection. +#### Enforcement {#inputs.proc.ai_agent.enforcement} + +AI Agent command execution enforcement. The first implementation only supports exec command audit/block. + +##### Enabled {#inputs.proc.ai_agent.enforcement.enabled} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.enabled` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: false +``` + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | bool | + +##### Mode {#inputs.proc.ai_agent.enforcement.mode} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.mode` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + mode: audit_only +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| audit_only | | +| block | | + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +##### Default Fallback {#inputs.proc.ai_agent.enforcement.default_fallback} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.default_fallback` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + default_fallback: sigkill +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| sigkill | | + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +##### Max Rules {#inputs.proc.ai_agent.enforcement.max_rules} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.max_rules` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + max_rules: 256 +``` + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | int | +| Range | [0, 1024] | + +##### Rules {#inputs.proc.ai_agent.enforcement.rules} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.rules` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + rules: [] +``` + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | dict | + +**Description**: + +AI Agent command enforcement rules. The first implementation supports exec command exact/prefix/suffix matching. + ### Symbol Table {#inputs.proc.symbol_table} #### Golang-specific {#inputs.proc.symbol_table.golang_specific} @@ -11970,4 +12115,3 @@ dev: **Description**: Unreleased deepflow-agent features can be turned on by setting this switch. - diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index 3dc309bfa85..f94141ae1ef 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1724,6 +1724,69 @@ inputs: # ch: |- # 是否开启 AI Agent 文件 IO 事件采集。 file_io_enabled: true + # type: section + # name: + # en: Enforcement + # ch: 执行阻断 + # description: + # en: AI Agent command execution enforcement. The first implementation only supports exec command audit/block. + # ch: AI Agent 命令执行阻断。第一版仅支持 exec 命令审计/阻断。 + enforcement: + # type: bool + # name: + # en: Enabled + # ch: 开启执行阻断 + # unit: + # range: [] + # enum_options: [] + # modification: hot_update + # ee_feature: true + enabled: false + # type: string + # name: + # en: Mode + # ch: 模式 + # unit: + # range: [] + # enum_options: [audit_only, block] + # modification: hot_update + # ee_feature: true + mode: audit_only + # type: string + # name: + # en: Default Fallback + # ch: 默认降级动作 + # unit: + # range: [] + # enum_options: [sigkill] + # modification: hot_update + # ee_feature: true + default_fallback: sigkill + # type: int + # name: + # en: Max Rules + # ch: 最大规则数 + # unit: + # range: [0, 1024] + # enum_options: [] + # modification: hot_update + # ee_feature: true + max_rules: 256 + # type: dict + # name: + # en: Rules + # ch: 规则 + # unit: + # range: [] + # enum_options: [] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # AI Agent command enforcement rules. The first implementation supports exec command exact/prefix/suffix matching. + # ch: |- + # AI Agent 命令执行阻断规则。第一版支持 exec 命令 exact/prefix/suffix 匹配。 + rules: [] # type: section # name: # en: Symbol Table From 5529446e1e44cb1d502e69fe56f7ba72296f911f Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 14:32:01 +0800 Subject: [PATCH 02/24] feat(agent): add ai agent enforcement strategies --- agent/src/config/config.rs | 11 +++ agent/src/config/handler.rs | 9 +++ server/agent_config/README-CH.md | 113 ++++++++++++++++++++++++++++++ server/agent_config/README.md | 113 ++++++++++++++++++++++++++++++ server/agent_config/template.yaml | 45 ++++++++++++ 5 files changed, 291 insertions(+) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 76f49642eff..64ca04bfd72 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -630,6 +630,9 @@ impl Default for AiAgentConfig { pub struct AiAgentEnforcementConfig { pub enabled: bool, pub mode: String, + pub strategy: String, + pub syscall_strategy: String, + pub allowed_mechanisms: Vec, pub default_fallback: String, pub max_rules: usize, pub rules: Vec, @@ -640,6 +643,14 @@ impl Default for AiAgentEnforcementConfig { Self { enabled: false, mode: "audit_only".to_string(), + strategy: "auto".to_string(), + syscall_strategy: "auto".to_string(), + allowed_mechanisms: vec![ + "lsm".to_string(), + "kprobe_override".to_string(), + "sigkill".to_string(), + "seccomp".to_string(), + ], default_fallback: "sigkill".to_string(), max_rules: 256, rules: Vec::new(), diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index 83f3d2c828b..ef43f8ecaf1 100644 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -6186,6 +6186,15 @@ mod tests { let config = LogParserConfig::default(); assert!(!config.ai_agent_enforcement.enabled); assert_eq!(config.ai_agent_enforcement.mode.as_str(), "audit_only"); + assert_eq!(config.ai_agent_enforcement.strategy.as_str(), "auto"); + assert_eq!( + config.ai_agent_enforcement.syscall_strategy.as_str(), + "auto" + ); + assert_eq!( + config.ai_agent_enforcement.allowed_mechanisms, + vec!["lsm", "kprobe_override", "sigkill", "seccomp"] + ); assert!(config.ai_agent_enforcement.rules.is_empty()); } diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index e87288cacc9..5c96e84c8da 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2420,6 +2420,119 @@ inputs: | ---- | ---------------------------- | | Type | string | +##### Exec 阻断策略 {#inputs.proc.ai_agent.enforcement.strategy} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.strategy` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + strategy: auto +``` + +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| auto | | +| lsm_only | | +| override_only | | +| sigkill_only | | +| audit_only | | + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**详细描述**: + +exec 命令阻断的机制选择。第一版在可用时使用 BPF LSM;kprobe override 预留给后续 syscall 阻断,并且仅在能力探测通过时使用。 + +##### Syscall 阻断策略 {#inputs.proc.ai_agent.enforcement.syscall_strategy} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.syscall_strategy` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + syscall_strategy: auto +``` + +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| auto | | +| lsm_only | | +| override_only | | +| sigkill_only | | +| audit_only | | + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**详细描述**: + +预留给后续直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 + +##### 允许的阻断机制 {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} + +**标签**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.allowed_mechanisms` + +**默认值**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] +``` + +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| lsm | | +| kprobe_override | | +| sigkill | | +| seccomp | | + +**模式**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**详细描述**: + +配置允许使用的阻断机制。只有列表包含 kprobe_override 且运行时能力探测确认支持时,才会尝试使用 bpf_override_return。 + ##### 默认降级动作 {#inputs.proc.ai_agent.enforcement.default_fallback} **标签**: diff --git a/server/agent_config/README.md b/server/agent_config/README.md index a13d363faf5..6634b21c566 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2451,6 +2451,119 @@ inputs: | ---- | ---------------------------- | | Type | string | +##### Strategy {#inputs.proc.ai_agent.enforcement.strategy} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.strategy` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + strategy: auto +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| auto | | +| lsm_only | | +| override_only | | +| sigkill_only | | +| audit_only | | + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**Description**: + +Enforcement mechanism selection for exec command blocking. The first implementation uses BPF LSM when available; kprobe override is reserved for future syscall blocking when capability probing succeeds. + +##### Syscall Strategy {#inputs.proc.ai_agent.enforcement.syscall_strategy} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.syscall_strategy` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + syscall_strategy: auto +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| auto | | +| lsm_only | | +| override_only | | +| sigkill_only | | +| audit_only | | + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**Description**: + +Reserved mechanism selection for future direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. + +##### Allowed Mechanisms {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} + +**Tags**: + +`hot_update` +ee_feature + +**FQCN**: + +`inputs.proc.ai_agent.enforcement.allowed_mechanisms` + +**Default value**: +```yaml +inputs: + proc: + ai_agent: + enforcement: + allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| lsm | | +| kprobe_override | | +| sigkill | | +| seccomp | | + +**Schema**: +| Key | Value | +| ---- | ---------------------------- | +| Type | string | + +**Description**: + +Mechanisms allowed by configuration. kprobe_override is only attempted when this list contains it and runtime capability probing confirms support. + ##### Default Fallback {#inputs.proc.ai_agent.enforcement.default_fallback} **Tags**: diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index f94141ae1ef..4159656ade8 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1754,6 +1754,51 @@ inputs: mode: audit_only # type: string # name: + # en: Strategy + # ch: Exec 阻断策略 + # unit: + # range: [] + # enum_options: [auto, lsm_only, override_only, sigkill_only, audit_only] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # Enforcement mechanism selection for exec command blocking. The first implementation uses BPF LSM when available; kprobe override is reserved for future syscall blocking when capability probing succeeds. + # ch: |- + # exec 命令阻断的机制选择。第一版在可用时使用 BPF LSM;kprobe override 预留给后续 syscall 阻断,并且仅在能力探测通过时使用。 + strategy: auto + # type: string + # name: + # en: Syscall Strategy + # ch: Syscall 阻断策略 + # unit: + # range: [] + # enum_options: [auto, lsm_only, override_only, sigkill_only, audit_only] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # Reserved mechanism selection for future direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. + # ch: |- + # 预留给后续直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 + syscall_strategy: auto + # type: string + # name: + # en: Allowed Mechanisms + # ch: 允许的阻断机制 + # unit: + # range: [] + # enum_options: [lsm, kprobe_override, sigkill, seccomp] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # Mechanisms allowed by configuration. kprobe_override is only attempted when this list contains it and runtime capability probing confirms support. + # ch: |- + # 配置允许使用的阻断机制。只有列表包含 kprobe_override 且运行时能力探测确认支持时,才会尝试使用 bpf_override_return。 + allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] + # type: string + # name: # en: Default Fallback # ch: 默认降级动作 # unit: From fcd379841181cf53a1b1c85740b3795a3d4c88aa Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 15:50:39 +0800 Subject: [PATCH 03/24] feat(agent): add proc block event model --- agent/src/common/ebpf.rs | 1 + agent/src/common/proc_event/linux.rs | 274 ++++++++++++++++++++++++- agent/src/ebpf/kernel/include/common.h | 1 + agent/src/ebpf/mod.rs | 2 + message/metric.proto | 36 ++++ 5 files changed, 311 insertions(+), 3 deletions(-) diff --git a/agent/src/common/ebpf.rs b/agent/src/common/ebpf.rs index 06ede88e067..28083133118 100644 --- a/agent/src/common/ebpf.rs +++ b/agent/src/common/ebpf.rs @@ -44,6 +44,7 @@ pub const UNIX_SOCKET: u8 = 8; pub const FILE_OP_EVENT: u8 = 9; pub const PERM_OP_EVENT: u8 = 10; pub const PROC_LIFECYCLE_EVENT: u8 = 11; +pub const PROC_BLOCK_EVENT: u8 = 12; const EBPF_TYPE_TRACEPOINT: u8 = 0; const EBPF_TYPE_TLS_UPROBE: u8 = 1; diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 2fd273a3a0b..7cc366e5bda 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -25,13 +25,13 @@ use std::{ use prost::Message; use public::{ - bytes::{read_u16_le, read_u32_le, read_u64_le}, + bytes::{read_i32_le, read_u16_le, read_u32_le, read_u64_le}, proto::metric, sender::{SendMessageType, Sendable}, }; use crate::common::{ - ebpf::{FILE_OP_EVENT, IO_EVENT, PERM_OP_EVENT, PROC_LIFECYCLE_EVENT}, + ebpf::{FILE_OP_EVENT, IO_EVENT, PERM_OP_EVENT, PROC_BLOCK_EVENT, PROC_LIFECYCLE_EVENT}, error::Error::{self, ParseEventData}, }; use crate::ebpf::SK_BPF_DATA; @@ -50,6 +50,13 @@ const IO_MNTNS_ID_OFFSET: usize = 1568; const IO_ACCESS_PERMISSION_OFFSET: usize = 1572; const IO_EVENT_BUFF_SIZE: usize = 1574; +fn parse_cstring_slice(slice: &[u8]) -> Vec { + match slice.iter().position(|&b| b == b'\0') { + Some(index) => slice[..index].to_vec(), + None => slice.to_vec(), + } +} + struct IoEventData { bytes_count: u32, // Number of bytes read and written operation: u32, // 0: write 1: read @@ -358,6 +365,157 @@ impl From for metric::ProcLifecycleEventData { } } +// ── ProcBlockEventData offsets (packed __ai_agent_proc_block_event) ───── +// Layout: +// target_type(1) + action(1) + mechanism(1) + guarantee(1) + errno(4) +// + pid(4) + parent_pid(4) + ai_agent_root_pid(4) + uid(4) + gid(4) +// + syscall_id(4) + timestamp(8) + policy_epoch(8) + rule_id(64) +// + comm(16) + cmdline(256) + exec_path(256) + syscall_name(32) +const PROC_BLOCK_TARGET_TYPE_OFF: usize = 0; +const PROC_BLOCK_ACTION_OFF: usize = 1; +const PROC_BLOCK_MECHANISM_OFF: usize = 2; +const PROC_BLOCK_GUARANTEE_OFF: usize = 3; +const PROC_BLOCK_ERRNO_OFF: usize = 4; +const PROC_BLOCK_PID_OFF: usize = 8; +const PROC_BLOCK_PPID_OFF: usize = 12; +const PROC_BLOCK_ROOT_PID_OFF: usize = 16; +const PROC_BLOCK_UID_OFF: usize = 20; +const PROC_BLOCK_GID_OFF: usize = 24; +const PROC_BLOCK_SYSCALL_ID_OFF: usize = 28; +const PROC_BLOCK_TS_OFF: usize = 32; +const PROC_BLOCK_POLICY_EPOCH_OFF: usize = 40; +const PROC_BLOCK_RULE_ID_OFF: usize = 48; +const PROC_BLOCK_COMM_OFF: usize = 112; +const PROC_BLOCK_CMDLINE_OFF: usize = 128; +const PROC_BLOCK_EXEC_PATH_OFF: usize = 384; +const PROC_BLOCK_SYSCALL_NAME_OFF: usize = 640; +const PROC_BLOCK_EVENT_SIZE: usize = 672; + +struct ProcBlockEventData { + rule_id: String, + target_type: u8, + action: u8, + mechanism: String, + guarantee: String, + errno: i32, + pid: u32, + parent_pid: u32, + ai_agent_root_pid: u32, + uid: u32, + gid: u32, + comm: Vec, + cmdline: Vec, + exec_path: Vec, + syscall_name: String, + syscall_id: u32, + timestamp: u64, + policy_epoch: u64, +} + +impl TryFrom<&[u8]> for ProcBlockEventData { + type Error = Error; + + fn try_from(raw: &[u8]) -> Result { + if raw.len() < PROC_BLOCK_EVENT_SIZE { + return Err(ParseEventData(format!( + "proc_block event too short: {} < {PROC_BLOCK_EVENT_SIZE}", + raw.len() + ))); + } + + Ok(Self { + rule_id: String::from_utf8_lossy(&parse_cstring_slice( + &raw[PROC_BLOCK_RULE_ID_OFF..PROC_BLOCK_COMM_OFF], + )) + .into_owned(), + target_type: raw[PROC_BLOCK_TARGET_TYPE_OFF], + action: raw[PROC_BLOCK_ACTION_OFF], + mechanism: enforcement_mechanism_name(raw[PROC_BLOCK_MECHANISM_OFF]).to_string(), + guarantee: enforcement_guarantee_name(raw[PROC_BLOCK_GUARANTEE_OFF]).to_string(), + errno: read_i32_le(&raw[PROC_BLOCK_ERRNO_OFF..]), + pid: read_u32_le(&raw[PROC_BLOCK_PID_OFF..]), + parent_pid: read_u32_le(&raw[PROC_BLOCK_PPID_OFF..]), + ai_agent_root_pid: read_u32_le(&raw[PROC_BLOCK_ROOT_PID_OFF..]), + uid: read_u32_le(&raw[PROC_BLOCK_UID_OFF..]), + gid: read_u32_le(&raw[PROC_BLOCK_GID_OFF..]), + comm: parse_cstring_slice(&raw[PROC_BLOCK_COMM_OFF..PROC_BLOCK_CMDLINE_OFF]), + cmdline: parse_cstring_slice(&raw[PROC_BLOCK_CMDLINE_OFF..PROC_BLOCK_EXEC_PATH_OFF]), + exec_path: parse_cstring_slice( + &raw[PROC_BLOCK_EXEC_PATH_OFF..PROC_BLOCK_SYSCALL_NAME_OFF], + ), + syscall_name: String::from_utf8_lossy(&parse_cstring_slice( + &raw[PROC_BLOCK_SYSCALL_NAME_OFF..PROC_BLOCK_EVENT_SIZE], + )) + .into_owned(), + syscall_id: read_u32_le(&raw[PROC_BLOCK_SYSCALL_ID_OFF..]), + timestamp: read_u64_le(&raw[PROC_BLOCK_TS_OFF..]), + policy_epoch: read_u64_le(&raw[PROC_BLOCK_POLICY_EPOCH_OFF..]), + }) + } +} + +impl From for metric::ProcBlockEventData { + fn from(d: ProcBlockEventData) -> Self { + Self { + rule_id: d.rule_id, + target_type: enforcement_target_type(d.target_type) as i32, + action: enforcement_action(d.action) as i32, + mechanism: d.mechanism, + guarantee: d.guarantee, + errno: d.errno, + pid: d.pid, + parent_pid: d.parent_pid, + ai_agent_root_pid: d.ai_agent_root_pid, + uid: d.uid, + gid: d.gid, + comm: d.comm, + cmdline: d.cmdline, + exec_path: d.exec_path, + syscall_name: d.syscall_name, + syscall_id: d.syscall_id, + timestamp: d.timestamp, + policy_epoch: d.policy_epoch, + } + } +} + +fn enforcement_target_type(code: u8) -> metric::EnforcementTargetType { + match code { + 1 => metric::EnforcementTargetType::EnforcementTargetExec, + 2 => metric::EnforcementTargetType::EnforcementTargetSyscall, + _ => metric::EnforcementTargetType::EnforcementTargetUnknown, + } +} + +fn enforcement_action(code: u8) -> metric::EnforcementAction { + match code { + 1 => metric::EnforcementAction::Audit, + 2 => metric::EnforcementAction::Deny, + 3 => metric::EnforcementAction::Sigkill, + _ => metric::EnforcementAction::Unknown, + } +} + +fn enforcement_mechanism_name(code: u8) -> &'static str { + match code { + 1 => "lsm", + 2 => "kprobe_override", + 3 => "sigkill", + 4 => "seccomp", + 5 => "user_space_audit", + _ => "unknown", + } +} + +fn enforcement_guarantee_name(code: u8) -> &'static str { + match code { + 1 => "prevented", + 2 => "best_effort", + 3 => "audit_only", + _ => "unknown", + } +} + // ── EventData ────────────────────────────────────────────────────────── enum EventData { OtherEvent, @@ -365,6 +523,7 @@ enum EventData { FileOpEvent(FileOpEventData), PermOpEvent(PermOpEventData), ProcLifecycleEvent(ProcLifecycleEventData), + ProcBlockEvent(ProcBlockEventData), } impl Debug for EventData { @@ -392,6 +551,14 @@ impl Debug for EventData { "ProcLifecycleEventData {{ type: {}, pid: {}, parent_pid: {} }}", d.lifecycle_type, d.pid, d.parent_pid )), + EventData::ProcBlockEvent(d) => f.write_fmt(format_args!( + "ProcBlockEventData {{ rule_id: {}, action: {}, mechanism: {}, pid: {}, exec_path: {} }}", + d.rule_id, + d.action, + d.mechanism, + d.pid, + str::from_utf8(&d.exec_path).unwrap_or("") + )), _ => f.write_str("other event"), } } @@ -405,6 +572,7 @@ pub enum EventType { FileOpEvent = 2, PermOpEvent = 3, ProcLifecycleEvent = 4, + ProcBlockEvent = 5, } impl From for EventType { @@ -414,6 +582,7 @@ impl From for EventType { FILE_OP_EVENT => Self::FileOpEvent, PERM_OP_EVENT => Self::PermOpEvent, PROC_LIFECYCLE_EVENT => Self::ProcLifecycleEvent, + PROC_BLOCK_EVENT => Self::ProcBlockEvent, _ => Self::OtherEvent, } } @@ -433,6 +602,7 @@ impl fmt::Display for EventType { Self::FileOpEvent => write!(f, "file_op_event"), Self::PermOpEvent => write!(f, "perm_op_event"), Self::ProcLifecycleEvent => write!(f, "proc_lifecycle_event"), + Self::ProcBlockEvent => write!(f, "proc_block_event"), } } } @@ -461,6 +631,7 @@ impl ProcEvent { let mut event_data: EventData = EventData::OtherEvent; let start_time = data.timestamp; // The unit of start_time is nanosecond let mut end_time = 0; + let mut ai_agent_root_pid = 0; match event_type { EventType::IoEvent => { let io_event_data = IoEventData::try_from(raw_data)?; @@ -482,6 +653,12 @@ impl ProcEvent { end_time = start_time; event_data = EventData::ProcLifecycleEvent(d); } + EventType::ProcBlockEvent => { + let d = ProcBlockEventData::try_from(raw_data)?; + end_time = start_time; + ai_agent_root_pid = d.ai_agent_root_pid; + event_data = EventData::ProcBlockEvent(d); + } _ => {} } @@ -501,7 +678,7 @@ impl ProcEvent { event_type, event_data, pod_id: 0, - ai_agent_root_pid: 0, + ai_agent_root_pid, }; Ok(BoxedProcEvents(Box::new(proc_event))) @@ -562,6 +739,9 @@ impl Sendable for BoxedProcEvents { EventData::ProcLifecycleEvent(d) => { pb_proc_event.proc_lifecycle_event_data = Some(d.into()); } + EventData::ProcBlockEvent(d) => { + pb_proc_event.proc_block_event_data = Some(d.into()); + } _ => {} } pb_proc_event @@ -651,4 +831,92 @@ mod tests { assert_eq!(path_to_bytes(&path), raw); } + + #[test] + fn test_proc_block_event_into_metric_carries_rule_and_command() { + let raw = make_proc_block_raw( + 1, + 2, + 1, + 1, + 1, + 13, + 100, + 10, + 100, + 1000, + 1000, + 42, + b"block-reboot", + b"reboot now", + b"/sbin/reboot", + b"lsm", + ); + let event = ProcBlockEventData::try_from(raw.as_slice()).unwrap(); + let pb: metric::ProcBlockEventData = event.into(); + assert_eq!(pb.rule_id, "block-reboot"); + assert_eq!(pb.exec_path, b"/sbin/reboot"); + assert_eq!(pb.cmdline, b"reboot now"); + assert_eq!(pb.mechanism, "lsm"); + } + + fn make_proc_block_raw( + target_type: u8, + action: u8, + mechanism: u8, + guarantee: u8, + errno: i32, + pid: u32, + parent_pid: u32, + ai_agent_root_pid: u32, + uid: u32, + gid: u32, + syscall_id: u32, + timestamp: u64, + rule_id: &[u8], + cmdline: &[u8], + exec_path: &[u8], + syscall_name: &[u8], + ) -> Vec { + let mut raw = vec![0; PROC_BLOCK_EVENT_SIZE]; + raw[PROC_BLOCK_TARGET_TYPE_OFF] = target_type; + raw[PROC_BLOCK_ACTION_OFF] = action; + raw[PROC_BLOCK_MECHANISM_OFF] = mechanism; + raw[PROC_BLOCK_GUARANTEE_OFF] = guarantee; + raw[PROC_BLOCK_ERRNO_OFF..PROC_BLOCK_ERRNO_OFF + 4].copy_from_slice(&errno.to_le_bytes()); + raw[PROC_BLOCK_PID_OFF..PROC_BLOCK_PID_OFF + 4].copy_from_slice(&pid.to_le_bytes()); + raw[PROC_BLOCK_PPID_OFF..PROC_BLOCK_PPID_OFF + 4] + .copy_from_slice(&parent_pid.to_le_bytes()); + raw[PROC_BLOCK_ROOT_PID_OFF..PROC_BLOCK_ROOT_PID_OFF + 4] + .copy_from_slice(&ai_agent_root_pid.to_le_bytes()); + raw[PROC_BLOCK_UID_OFF..PROC_BLOCK_UID_OFF + 4].copy_from_slice(&uid.to_le_bytes()); + raw[PROC_BLOCK_GID_OFF..PROC_BLOCK_GID_OFF + 4].copy_from_slice(&gid.to_le_bytes()); + raw[PROC_BLOCK_SYSCALL_ID_OFF..PROC_BLOCK_SYSCALL_ID_OFF + 4] + .copy_from_slice(&syscall_id.to_le_bytes()); + raw[PROC_BLOCK_TS_OFF..PROC_BLOCK_TS_OFF + 8].copy_from_slice(×tamp.to_le_bytes()); + raw[PROC_BLOCK_POLICY_EPOCH_OFF..PROC_BLOCK_POLICY_EPOCH_OFF + 8] + .copy_from_slice(&42_u64.to_le_bytes()); + write_cstr( + &mut raw[PROC_BLOCK_RULE_ID_OFF..PROC_BLOCK_RULE_ID_OFF + 64], + rule_id, + ); + write_cstr( + &mut raw[PROC_BLOCK_CMDLINE_OFF..PROC_BLOCK_CMDLINE_OFF + 256], + cmdline, + ); + write_cstr( + &mut raw[PROC_BLOCK_EXEC_PATH_OFF..PROC_BLOCK_EXEC_PATH_OFF + 256], + exec_path, + ); + write_cstr( + &mut raw[PROC_BLOCK_SYSCALL_NAME_OFF..PROC_BLOCK_SYSCALL_NAME_OFF + 32], + syscall_name, + ); + raw + } + + fn write_cstr(dst: &mut [u8], src: &[u8]) { + let len = src.len().min(dst.len().saturating_sub(1)); + dst[..len].copy_from_slice(&src[..len]); + } } diff --git a/agent/src/ebpf/kernel/include/common.h b/agent/src/ebpf/kernel/include/common.h index 625ffd254c3..5c5f03d8e44 100644 --- a/agent/src/ebpf/kernel/include/common.h +++ b/agent/src/ebpf/kernel/include/common.h @@ -119,6 +119,7 @@ enum process_data_extra_source { DATA_SOURCE_FILE_OP_EVENT, DATA_SOURCE_PERM_OP_EVENT, DATA_SOURCE_PROC_LIFECYCLE_EVENT, + DATA_SOURCE_PROC_BLOCK_EVENT, }; struct protocol_message_t { diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index 6bd6eb1a5b3..8f15ad1f434 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -169,6 +169,8 @@ pub const DATA_SOURCE_PERM_OP_EVENT: u8 = 10; #[allow(dead_code)] pub const DATA_SOURCE_PROC_LIFECYCLE_EVENT: u8 = 11; #[allow(dead_code)] +pub const DATA_SOURCE_PROC_BLOCK_EVENT: u8 = 12; +#[allow(dead_code)] pub const DATA_SOURCE_GO_HTTP2_DATAFRAME_UPROBE: u8 = 5; #[allow(dead_code)] pub const DATA_SOURCE_UNIX_SOCKET: u8 = 8; diff --git a/message/metric.proto b/message/metric.proto index 0c39f671033..bbfe966d235 100644 --- a/message/metric.proto +++ b/message/metric.proto @@ -266,6 +266,7 @@ enum EventType { FileOpEvent = 2; // File creation/deletion/chmod/chown PermOpEvent = 3; // setuid/setgid/setreuid/setregid ProcLifecycleEvent = 4; // fork/exec/exit + ProcBlockEvent = 5; // AI Agent command/syscall enforcement audit/block } enum FileOpType { @@ -328,6 +329,40 @@ message ProcLifecycleEventData { bytes exec_path = 9; } +enum EnforcementTargetType { + EnforcementTargetUnknown = 0; + EnforcementTargetExec = 1; + EnforcementTargetSyscall = 2; +} + +enum EnforcementAction { + EnforcementActionUnknown = 0; + EnforcementActionAudit = 1; + EnforcementActionDeny = 2; + EnforcementActionSigkill = 3; +} + +message ProcBlockEventData { + string rule_id = 1; + EnforcementTargetType target_type = 2; + EnforcementAction action = 3; + string mechanism = 4; + string guarantee = 5; + int32 errno = 6; + uint32 pid = 7; + uint32 parent_pid = 8; + uint32 ai_agent_root_pid = 9; + uint32 uid = 10; + uint32 gid = 11; + bytes comm = 12; + bytes cmdline = 13; + bytes exec_path = 14; + string syscall_name = 15; + uint32 syscall_id = 16; + uint64 timestamp = 17; + uint64 policy_epoch = 18; +} + message ProcEvent { uint32 pid = 1; uint32 thread_id = 2; @@ -343,6 +378,7 @@ message ProcEvent { PermOpEventData perm_op_event_data = 12; ProcLifecycleEventData proc_lifecycle_event_data = 13; uint32 ai_agent_root_pid = 14; + ProcBlockEventData proc_block_event_data = 15; } message PrometheusMetric { From 00ef425185c49217bca7bc71a0ccfcd898e56be4 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 17:17:06 +0800 Subject: [PATCH 04/24] feat(server): route ai agent block events --- server/ingester/datasource/handle.go | 82 ++++++------- server/ingester/datasource/handle_test.go | 6 + server/ingester/event/common/common.go | 7 ++ server/ingester/event/common/common_test.go | 1 + server/ingester/event/dbwriter/event.go | 8 +- .../event/dbwriter/event_column_block.go | 2 + server/ingester/event/dbwriter/event_test.go | 6 + .../ingester/event/dbwriter/event_writer.go | 2 +- .../event/dbwriter/proc_block_event.go | 87 ++++++++++++++ .../dbwriter/proc_block_event_column_block.go | 108 ++++++++++++++++++ .../event/dbwriter/proc_block_event_test.go | 19 +++ server/ingester/event/decoder/decoder.go | 86 +++++++++++++- server/ingester/event/decoder/decoder_test.go | 10 ++ server/ingester/event/event/event.go | 15 ++- server/ingester/exporters/config/config.go | 5 +- server/libs/nativetag/nativetag.go | 3 + 16 files changed, 394 insertions(+), 53 deletions(-) create mode 100644 server/ingester/event/dbwriter/proc_block_event.go create mode 100644 server/ingester/event/dbwriter/proc_block_event_column_block.go create mode 100644 server/ingester/event/dbwriter/proc_block_event_test.go diff --git a/server/ingester/datasource/handle.go b/server/ingester/datasource/handle.go index fda849798b8..7851933e720 100644 --- a/server/ingester/datasource/handle.go +++ b/server/ingester/datasource/handle.go @@ -47,51 +47,53 @@ type DatasourceInfo struct { } const ( - DEEPFLOW_SYSTEM DatasourceModifiedOnly = "deepflow_system" - L4_FLOW_LOG = "flow_log.l4_flow_log" - L7_FLOW_LOG = "flow_log.l7_flow_log" - L4_PACKET = "flow_log.l4_packet" - L7_PACKET = "flow_log.l7_packet" - EXT_METRICS = "ext_metrics" - PROMETHEUS = "prometheus" - EVENT_EVENT = "event.event" - EVENT_FILE_EVENT = "event.file_event" - EVENT_FILE_AGG_EVENT = "event.file_agg_event" - EVENT_FILE_MGMT_EVENT = "event.file_mgmt_event" - EVENT_PROC_PERM_EVENT = "event.proc_perm_event" - EVENT_PROC_OPS_EVENT = "event.proc_ops_event" - EVENT_ALERT_EVENT = "event.alert_event" - PROFILE = "profile.in_process" - APPLOG = "application_log.log" - DEEPFLOW_TENANT = "deepflow_tenant" - DEEPFLOW_ADMIN = "deepflow_admin" - PROFILE_METRICS = "profile.in_process_metrics" - FILE_EVNET_METRICS = "event.file_event_metrics" + DEEPFLOW_SYSTEM DatasourceModifiedOnly = "deepflow_system" + L4_FLOW_LOG = "flow_log.l4_flow_log" + L7_FLOW_LOG = "flow_log.l7_flow_log" + L4_PACKET = "flow_log.l4_packet" + L7_PACKET = "flow_log.l7_packet" + EXT_METRICS = "ext_metrics" + PROMETHEUS = "prometheus" + EVENT_EVENT = "event.event" + EVENT_FILE_EVENT = "event.file_event" + EVENT_FILE_AGG_EVENT = "event.file_agg_event" + EVENT_FILE_MGMT_EVENT = "event.file_mgmt_event" + EVENT_PROC_PERM_EVENT = "event.proc_perm_event" + EVENT_PROC_OPS_EVENT = "event.proc_ops_event" + EVENT_PROC_BLOCK_EVENT = "event.proc_block_event" + EVENT_ALERT_EVENT = "event.alert_event" + PROFILE = "profile.in_process" + APPLOG = "application_log.log" + DEEPFLOW_TENANT = "deepflow_tenant" + DEEPFLOW_ADMIN = "deepflow_admin" + PROFILE_METRICS = "profile.in_process_metrics" + FILE_EVNET_METRICS = "event.file_event_metrics" ) // to modify the datasource TTL, you need to also modify the 'flow_tag' database tables. // FIXME: only the 'prometheus' database is supported now, and the remaining databases will be completed in the future. var DatasourceModifiedOnlyIDMap = map[DatasourceModifiedOnly]DatasourceInfo{ - DEEPFLOW_SYSTEM: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 1, "deepflow_system", []string{"deepflow_system"}, []string{}}, - L4_FLOW_LOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 2, "flow_log", []string{"l4_flow_log"}, []string{}}, - L7_FLOW_LOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 3, "flow_log", []string{"l7_flow_log"}, []string{}}, - L4_PACKET: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 4, "flow_log", []string{"l4_packet"}, []string{}}, - L7_PACKET: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 5, "flow_log", []string{"l7_packet"}, []string{}}, - EXT_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 6, "ext_metrics", []string{"metrics"}, []string{}}, - PROMETHEUS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 7, "prometheus", []string{"samples"}, []string{"prometheus_custom_field", "prometheus_custom_field_value"}}, - EVENT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 8, "event", []string{"event"}, []string{}}, - EVENT_FILE_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 9, "event", []string{"file_event"}, []string{}}, - EVENT_FILE_AGG_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 10, "event", []string{"file_agg_event"}, []string{}}, - EVENT_FILE_MGMT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 11, "event", []string{"file_mgmt_event"}, []string{}}, - EVENT_PROC_PERM_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 12, "event", []string{"proc_perm_event"}, []string{}}, - EVENT_PROC_OPS_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 13, "event", []string{"proc_ops_event"}, []string{}}, - EVENT_ALERT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 14, "event", []string{"alert_event", "alert_record"}, []string{}}, - PROFILE: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 15, "profile", []string{"in_process"}, []string{}}, - APPLOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 16, "application_log", []string{"log"}, []string{}}, - DEEPFLOW_TENANT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 17, "deepflow_tenant", []string{"deepflow_collector"}, []string{}}, - DEEPFLOW_ADMIN: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 18, "deepflow_admin", []string{"deepflow_server"}, []string{}}, - PROFILE_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 19, "profile", []string{"in_process_metrics.1s_agg"}, []string{}}, - FILE_EVNET_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 20, "event", []string{"file_event_metrics.1s_agg"}, []string{}}, + DEEPFLOW_SYSTEM: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 1, "deepflow_system", []string{"deepflow_system"}, []string{}}, + L4_FLOW_LOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 2, "flow_log", []string{"l4_flow_log"}, []string{}}, + L7_FLOW_LOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 3, "flow_log", []string{"l7_flow_log"}, []string{}}, + L4_PACKET: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 4, "flow_log", []string{"l4_packet"}, []string{}}, + L7_PACKET: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 5, "flow_log", []string{"l7_packet"}, []string{}}, + EXT_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 6, "ext_metrics", []string{"metrics"}, []string{}}, + PROMETHEUS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 7, "prometheus", []string{"samples"}, []string{"prometheus_custom_field", "prometheus_custom_field_value"}}, + EVENT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 8, "event", []string{"event"}, []string{}}, + EVENT_FILE_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 9, "event", []string{"file_event"}, []string{}}, + EVENT_FILE_AGG_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 10, "event", []string{"file_agg_event"}, []string{}}, + EVENT_FILE_MGMT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 11, "event", []string{"file_mgmt_event"}, []string{}}, + EVENT_PROC_PERM_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 12, "event", []string{"proc_perm_event"}, []string{}}, + EVENT_PROC_OPS_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 13, "event", []string{"proc_ops_event"}, []string{}}, + EVENT_ALERT_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 14, "event", []string{"alert_event", "alert_record"}, []string{}}, + PROFILE: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 15, "profile", []string{"in_process"}, []string{}}, + APPLOG: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 16, "application_log", []string{"log"}, []string{}}, + DEEPFLOW_TENANT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 17, "deepflow_tenant", []string{"deepflow_collector"}, []string{}}, + DEEPFLOW_ADMIN: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 18, "deepflow_admin", []string{"deepflow_server"}, []string{}}, + PROFILE_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 19, "profile", []string{"in_process_metrics.1s_agg"}, []string{}}, + FILE_EVNET_METRICS: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 20, "event", []string{"file_event_metrics.1s_agg"}, []string{}}, + EVENT_PROC_BLOCK_EVENT: {int(flow_metrics.METRICS_TABLE_ID_MAX) + 21, "event", []string{"proc_block_event"}, []string{}}, } func (ds DatasourceModifiedOnly) DatasourceInfo() DatasourceInfo { diff --git a/server/ingester/datasource/handle_test.go b/server/ingester/datasource/handle_test.go index 589aa5d5bf7..cd98eda390f 100644 --- a/server/ingester/datasource/handle_test.go +++ b/server/ingester/datasource/handle_test.go @@ -33,6 +33,12 @@ func TestNewAiAgentEventDatasourcesRegistered(t *testing.T) { wantDB: "event", wantTable: "proc_ops_event", }, + { + name: "proc block event", + datasource: EVENT_PROC_BLOCK_EVENT, + wantDB: "event", + wantTable: "proc_block_event", + }, } for _, tt := range tests { diff --git a/server/ingester/event/common/common.go b/server/ingester/event/common/common.go index b6422f7dba7..18ff0cb2e64 100644 --- a/server/ingester/event/common/common.go +++ b/server/ingester/event/common/common.go @@ -32,6 +32,7 @@ const ( FILE_MGMT_EVENT PROC_PERM_EVENT PROC_OPS_EVENT + PROC_BLOCK_EVENT ALERT_EVENT K8S_EVENT ALERT_RECORD @@ -51,6 +52,8 @@ func (e EventType) String() string { return "proc_perm_event" case PROC_OPS_EVENT: return "proc_ops_event" + case PROC_BLOCK_EVENT: + return "proc_block_event" case ALERT_EVENT: return "alert_event" case K8S_EVENT: @@ -77,6 +80,8 @@ func (e EventType) TableName() string { return "proc_perm_event" case PROC_OPS_EVENT: return "proc_ops_event" + case PROC_BLOCK_EVENT: + return "proc_block_event" case ALERT_EVENT: return "alert_event" case ALERT_RECORD: @@ -90,6 +95,8 @@ func (e EventType) DataSource() uint32 { switch e { case FILE_EVENT, FILE_AGG_EVENT, FILE_MGMT_EVENT, PROC_PERM_EVENT, PROC_OPS_EVENT: return uint32(exportconfig.FILE_EVENT) + case PROC_BLOCK_EVENT: + return uint32(exportconfig.PROC_BLOCK_EVENT) default: return uint32(exportconfig.MAX_DATASOURCE_ID) } diff --git a/server/ingester/event/common/common_test.go b/server/ingester/event/common/common_test.go index bb855fdf4e7..532c4793b0f 100644 --- a/server/ingester/event/common/common_test.go +++ b/server/ingester/event/common/common_test.go @@ -11,6 +11,7 @@ func TestNewAiAgentEventTypesTableName(t *testing.T) { {FILE_MGMT_EVENT, "file_mgmt_event"}, {PROC_PERM_EVENT, "proc_perm_event"}, {PROC_OPS_EVENT, "proc_ops_event"}, + {PROC_BLOCK_EVENT, "proc_block_event"}, } for _, tt := range tests { diff --git a/server/ingester/event/dbwriter/event.go b/server/ingester/event/dbwriter/event.go index d1c209177be..80e63454b74 100644 --- a/server/ingester/event/dbwriter/event.go +++ b/server/ingester/event/dbwriter/event.go @@ -131,6 +131,8 @@ func (e *EventStore) NativeTagVersion() uint32 { return nativetag.GetTableNativeTagsVersion(e.OrgId, nativetag.EVENT_PROC_PERM_EVENT) case common.PROC_OPS_EVENT: return nativetag.GetTableNativeTagsVersion(e.OrgId, nativetag.EVENT_PROC_OPS_EVENT) + case common.PROC_BLOCK_EVENT: + return nativetag.GetTableNativeTagsVersion(e.OrgId, nativetag.EVENT_PROC_BLOCK_EVENT) default: return nativetag.GetTableNativeTagsVersion(e.OrgId, nativetag.EVENT_EVENT) } @@ -160,6 +162,8 @@ func (e *EventStore) DataSource() uint32 { return uint32(config.PROC_PERM_EVENT) case common.PROC_OPS_EVENT: return uint32(config.PROC_OPS_EVENT) + case common.PROC_BLOCK_EVENT: + return uint32(config.PROC_BLOCK_EVENT) default: return uint32(config.MAX_DATASOURCE_ID) } @@ -292,6 +296,8 @@ func GenEventCKTable(cluster, storagePolicy, table, ckdbType string, ttl int, co columns = ProcPermEventColumns() case common.PROC_OPS_EVENT.TableName(): columns = ProcOpsEventColumns() + case common.PROC_BLOCK_EVENT.TableName(): + columns = ProcBlockEventColumns() } if table == common.FILE_EVENT.TableName() || table == common.FILE_MGMT_EVENT.TableName() || table == common.FILE_AGG_EVENT.TableName() { partition = DefaultFileEventPartition @@ -299,7 +305,7 @@ func GenEventCKTable(cluster, storagePolicy, table, ckdbType string, ttl int, co aggr1S := true switch table { - case common.FILE_AGG_EVENT.TableName(), common.FILE_MGMT_EVENT.TableName(), common.PROC_PERM_EVENT.TableName(), common.PROC_OPS_EVENT.TableName(): + case common.FILE_AGG_EVENT.TableName(), common.FILE_MGMT_EVENT.TableName(), common.PROC_PERM_EVENT.TableName(), common.PROC_OPS_EVENT.TableName(), common.PROC_BLOCK_EVENT.TableName(): aggr1S = false } diff --git a/server/ingester/event/dbwriter/event_column_block.go b/server/ingester/event/dbwriter/event_column_block.go index 8dbe3b053f9..f023a690a05 100644 --- a/server/ingester/event/dbwriter/event_column_block.go +++ b/server/ingester/event/dbwriter/event_column_block.go @@ -209,6 +209,8 @@ func (n *EventStore) NewColumnBlock() ckdb.CKColumnBlock { b.NativeTagsBlock = nativetag.GetTableNativeTagsColumnBlock(n.OrgId, nativetag.EVENT_PROC_PERM_EVENT) case common.PROC_OPS_EVENT: b.NativeTagsBlock = nativetag.GetTableNativeTagsColumnBlock(n.OrgId, nativetag.EVENT_PROC_OPS_EVENT) + case common.PROC_BLOCK_EVENT: + b.NativeTagsBlock = nativetag.GetTableNativeTagsColumnBlock(n.OrgId, nativetag.EVENT_PROC_BLOCK_EVENT) default: b.NativeTagsBlock = nativetag.GetTableNativeTagsColumnBlock(n.OrgId, nativetag.EVENT_EVENT) } diff --git a/server/ingester/event/dbwriter/event_test.go b/server/ingester/event/dbwriter/event_test.go index d8cd40f939b..eb83895b830 100644 --- a/server/ingester/event/dbwriter/event_test.go +++ b/server/ingester/event/dbwriter/event_test.go @@ -39,6 +39,11 @@ func TestEventStoreDataSourceForNewAiAgentTables(t *testing.T) { store: EventStore{StoreEventType: eventcommon.PROC_OPS_EVENT}, want: uint32(exporterconfig.PROC_OPS_EVENT), }, + { + name: "proc block event", + store: EventStore{StoreEventType: eventcommon.PROC_BLOCK_EVENT}, + want: uint32(exporterconfig.PROC_BLOCK_EVENT), + }, } for _, tt := range tests { @@ -60,6 +65,7 @@ func TestGenEventCKTableDisables1SAggrForNewAiAgentTables(t *testing.T) { {eventcommon.FILE_MGMT_EVENT.TableName(), false}, {eventcommon.PROC_PERM_EVENT.TableName(), false}, {eventcommon.PROC_OPS_EVENT.TableName(), false}, + {eventcommon.PROC_BLOCK_EVENT.TableName(), false}, } for _, tt := range tests { diff --git a/server/ingester/event/dbwriter/event_writer.go b/server/ingester/event/dbwriter/event_writer.go index 840c86988d5..e0f2d1a3e29 100644 --- a/server/ingester/event/dbwriter/event_writer.go +++ b/server/ingester/event/dbwriter/event_writer.go @@ -106,7 +106,7 @@ func NewEventWriter(eventType common.EventType, decoderIndex int, config *config case common.RESOURCE_EVENT: w.ttl = config.EventTTL w.writerConfig = config.CKWriterConfig - case common.FILE_EVENT, common.FILE_AGG_EVENT, common.FILE_MGMT_EVENT, common.PROC_PERM_EVENT, common.PROC_OPS_EVENT: + case common.FILE_EVENT, common.FILE_AGG_EVENT, common.FILE_MGMT_EVENT, common.PROC_PERM_EVENT, common.PROC_OPS_EVENT, common.PROC_BLOCK_EVENT: w.ttl = config.FileEventTTL w.writerConfig = config.FileEventCKWriterConfig case common.K8S_EVENT: diff --git a/server/ingester/event/dbwriter/proc_block_event.go b/server/ingester/event/dbwriter/proc_block_event.go new file mode 100644 index 00000000000..b0935be987d --- /dev/null +++ b/server/ingester/event/dbwriter/proc_block_event.go @@ -0,0 +1,87 @@ +package dbwriter + +import ( + "github.com/deepflowio/deepflow/server/ingester/event/common" + "github.com/deepflowio/deepflow/server/libs/ckdb" + "github.com/deepflowio/deepflow/server/libs/pool" +) + +var procBlockEventPool = pool.NewLockFreePool(func() *ProcBlockEventStore { + return &ProcBlockEventStore{ + EventStore: EventStore{ + AttributeNames: []string{}, + AttributeValues: []string{}, + StoreEventType: common.PROC_BLOCK_EVENT, + IsIPv4: true, + }, + } +}) + +type ProcBlockEventStore struct { + EventStore + + RuleID string + TargetType string + Action string + Mechanism string + Guarantee string + Errno int32 + Pid uint32 + ParentPid uint32 + UID uint32 + GID uint32 + Cmdline string + ExecPath string + SyscallName string + SyscallID uint32 + PolicyEpoch uint64 +} + +func AcquireProcBlockEventStore() *ProcBlockEventStore { + e := procBlockEventPool.Get() + e.Reset() + return e +} + +func ReleaseProcBlockEventStore(e *ProcBlockEventStore) { + if e == nil { + return + } + attrNames := e.AttributeNames[:0] + attrValues := e.AttributeValues[:0] + *e = ProcBlockEventStore{} + e.AttributeNames = attrNames + e.AttributeValues = attrValues + e.IsIPv4 = true + e.StoreEventType = common.PROC_BLOCK_EVENT + procBlockEventPool.Put(e) +} + +func (e *ProcBlockEventStore) Release() { + ReleaseProcBlockEventStore(e) +} + +func ProcBlockEventColumns() []*ckdb.Column { + columns := EventColumns(false) + columns = append(columns, + ckdb.NewColumn("rule_id", ckdb.String).SetGroupBy(), + ckdb.NewColumn("target_type", ckdb.LowCardinalityString).SetGroupBy(), + ckdb.NewColumn("action", ckdb.LowCardinalityString).SetGroupBy(), + ckdb.NewColumn("mechanism", ckdb.LowCardinalityString).SetGroupBy(), + ckdb.NewColumn("guarantee", ckdb.LowCardinalityString).SetGroupBy(), + ckdb.NewColumn("errno", ckdb.Int32).SetGroupBy(), + ckdb.NewColumn("pid", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("parent_pid", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("root_pid", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("uid", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("gid", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("cmdline", ckdb.String).SetIgnoredInAggrTable(), + ckdb.NewColumn("exec_path", ckdb.String).SetIgnoredInAggrTable(), + ckdb.NewColumn("syscall_name", ckdb.LowCardinalityString).SetGroupBy(), + ckdb.NewColumn("syscall_id", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("policy_epoch", ckdb.UInt64).SetGroupBy(), + ckdb.NewColumn("syscall_thread", ckdb.UInt32).SetGroupBy(), + ckdb.NewColumn("syscall_coroutine", ckdb.UInt32).SetGroupBy(), + ) + return columns +} diff --git a/server/ingester/event/dbwriter/proc_block_event_column_block.go b/server/ingester/event/dbwriter/proc_block_event_column_block.go new file mode 100644 index 00000000000..26aad315ca0 --- /dev/null +++ b/server/ingester/event/dbwriter/proc_block_event_column_block.go @@ -0,0 +1,108 @@ +package dbwriter + +import ( + "github.com/ClickHouse/ch-go/proto" + "github.com/deepflowio/deepflow/server/libs/ckdb" +) + +type ProcBlockEventBlock struct { + EventBlock + ColRuleID proto.ColStr + ColTargetType *proto.ColLowCardinality[string] + ColAction *proto.ColLowCardinality[string] + ColMechanism *proto.ColLowCardinality[string] + ColGuarantee *proto.ColLowCardinality[string] + ColErrno proto.ColInt32 + ColPid proto.ColUInt32 + ColParentPid proto.ColUInt32 + ColRootPID proto.ColUInt32 + ColUID proto.ColUInt32 + ColGID proto.ColUInt32 + ColCmdline proto.ColStr + ColExecPath proto.ColStr + ColSyscallName *proto.ColLowCardinality[string] + ColSyscallID proto.ColUInt32 + ColPolicyEpoch proto.ColUInt64 + ColSyscallThread proto.ColUInt32 + ColSyscallCoroutine proto.ColUInt32 +} + +func (b *ProcBlockEventBlock) Reset() { + b.EventBlock.Reset() + b.ColRuleID.Reset() + b.ColTargetType.Reset() + b.ColAction.Reset() + b.ColMechanism.Reset() + b.ColGuarantee.Reset() + b.ColErrno.Reset() + b.ColPid.Reset() + b.ColParentPid.Reset() + b.ColRootPID.Reset() + b.ColUID.Reset() + b.ColGID.Reset() + b.ColCmdline.Reset() + b.ColExecPath.Reset() + b.ColSyscallName.Reset() + b.ColSyscallID.Reset() + b.ColPolicyEpoch.Reset() + b.ColSyscallThread.Reset() + b.ColSyscallCoroutine.Reset() +} + +func (b *ProcBlockEventBlock) ToInput(input proto.Input) proto.Input { + input = b.EventBlock.ToInput(input) + return append(input, + proto.InputColumn{Name: "rule_id", Data: &b.ColRuleID}, + proto.InputColumn{Name: "target_type", Data: b.ColTargetType}, + proto.InputColumn{Name: "action", Data: b.ColAction}, + proto.InputColumn{Name: "mechanism", Data: b.ColMechanism}, + proto.InputColumn{Name: "guarantee", Data: b.ColGuarantee}, + proto.InputColumn{Name: "errno", Data: &b.ColErrno}, + proto.InputColumn{Name: "pid", Data: &b.ColPid}, + proto.InputColumn{Name: "parent_pid", Data: &b.ColParentPid}, + proto.InputColumn{Name: "root_pid", Data: &b.ColRootPID}, + proto.InputColumn{Name: "uid", Data: &b.ColUID}, + proto.InputColumn{Name: "gid", Data: &b.ColGID}, + proto.InputColumn{Name: "cmdline", Data: &b.ColCmdline}, + proto.InputColumn{Name: "exec_path", Data: &b.ColExecPath}, + proto.InputColumn{Name: "syscall_name", Data: b.ColSyscallName}, + proto.InputColumn{Name: "syscall_id", Data: &b.ColSyscallID}, + proto.InputColumn{Name: "policy_epoch", Data: &b.ColPolicyEpoch}, + proto.InputColumn{Name: "syscall_thread", Data: &b.ColSyscallThread}, + proto.InputColumn{Name: "syscall_coroutine", Data: &b.ColSyscallCoroutine}, + ) +} + +func (n *ProcBlockEventStore) NewColumnBlock() ckdb.CKColumnBlock { + return &ProcBlockEventBlock{ + EventBlock: *n.EventStore.NewColumnBlock().(*EventBlock), + ColTargetType: new(proto.ColStr).LowCardinality(), + ColAction: new(proto.ColStr).LowCardinality(), + ColMechanism: new(proto.ColStr).LowCardinality(), + ColGuarantee: new(proto.ColStr).LowCardinality(), + ColSyscallName: new(proto.ColStr).LowCardinality(), + } +} + +func (n *ProcBlockEventStore) AppendToColumnBlock(b ckdb.CKColumnBlock) { + block := b.(*ProcBlockEventBlock) + n.EventStore.AppendToColumnBlock(&block.EventBlock) + block.ColRuleID.Append(n.RuleID) + block.ColTargetType.Append(n.TargetType) + block.ColAction.Append(n.Action) + block.ColMechanism.Append(n.Mechanism) + block.ColGuarantee.Append(n.Guarantee) + block.ColErrno.Append(n.Errno) + block.ColPid.Append(n.Pid) + block.ColParentPid.Append(n.ParentPid) + block.ColRootPID.Append(n.RootPID) + block.ColUID.Append(n.UID) + block.ColGID.Append(n.GID) + block.ColCmdline.Append(n.Cmdline) + block.ColExecPath.Append(n.ExecPath) + block.ColSyscallName.Append(n.SyscallName) + block.ColSyscallID.Append(n.SyscallID) + block.ColPolicyEpoch.Append(n.PolicyEpoch) + block.ColSyscallThread.Append(n.SyscallThread) + block.ColSyscallCoroutine.Append(n.SyscallCoroutine) +} diff --git a/server/ingester/event/dbwriter/proc_block_event_test.go b/server/ingester/event/dbwriter/proc_block_event_test.go new file mode 100644 index 00000000000..9186af6f16b --- /dev/null +++ b/server/ingester/event/dbwriter/proc_block_event_test.go @@ -0,0 +1,19 @@ +package dbwriter + +import ( + "testing" + + "github.com/deepflowio/deepflow/server/ingester/event/common" +) + +func TestAcquireProcBlockEventStoreDefaults(t *testing.T) { + store := AcquireProcBlockEventStore() + defer store.Release() + + if !store.IsIPv4 { + t.Fatalf("AcquireProcBlockEventStore() IsIPv4 = false, want true") + } + if store.StoreEventType != common.PROC_BLOCK_EVENT { + t.Fatalf("AcquireProcBlockEventStore() StoreEventType = %v, want %v", store.StoreEventType, common.PROC_BLOCK_EVENT) + } +} diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index 9b76247434e..2b0663120f1 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -222,11 +222,12 @@ type Decoder struct { } type ProcEventWriters struct { - FileWriter *dbwriter.EventWriter - FileAggWriter *dbwriter.EventWriter - FileMgmtWriter *dbwriter.EventWriter - ProcPermWriter *dbwriter.EventWriter - ProcOpsWriter *dbwriter.EventWriter + FileWriter *dbwriter.EventWriter + FileAggWriter *dbwriter.EventWriter + FileMgmtWriter *dbwriter.EventWriter + ProcPermWriter *dbwriter.EventWriter + ProcOpsWriter *dbwriter.EventWriter + ProcBlockWriter *dbwriter.EventWriter } func NewDecoder( @@ -350,11 +351,45 @@ func routeProcEventType(e *pb.ProcEvent) common.EventType { return common.PROC_PERM_EVENT case e.ProcLifecycleEventData != nil: return common.PROC_OPS_EVENT + case e.ProcBlockEventData != nil: + return common.PROC_BLOCK_EVENT default: return common.FILE_EVENT } } +func enforcementTargetTypeString(t pb.EnforcementTargetType) string { + switch t { + case pb.EnforcementTargetType_EnforcementTargetExec: + return "exec" + case pb.EnforcementTargetType_EnforcementTargetSyscall: + return "syscall" + default: + name := strings.TrimPrefix(t.String(), "EnforcementTarget") + if name == "" || name == t.String() { + return "unknown" + } + return strings.ToLower(name) + } +} + +func enforcementActionString(a pb.EnforcementAction) string { + switch a { + case pb.EnforcementAction_EnforcementActionAudit: + return "audit" + case pb.EnforcementAction_EnforcementActionDeny: + return "deny" + case pb.EnforcementAction_EnforcementActionSigkill: + return "sigkill" + default: + name := strings.TrimPrefix(a.String(), "EnforcementAction") + if name == "" || name == a.String() { + return "unknown" + } + return strings.ToLower(name) + } +} + func extractProcOpsCommandData(e *pb.ProcEvent) (string, string) { if e == nil || e.ProcLifecycleEventData == nil { return "", "" @@ -627,6 +662,45 @@ func (d *Decoder) writeProcOpsEvent(vtapId uint16, e *pb.ProcEvent) { s.Release() } +func (d *Decoder) writeProcBlockEvent(vtapId uint16, e *pb.ProcEvent) { + s := dbwriter.AcquireProcBlockEventStore() + d.initEventStoreCommon(&s.EventStore, vtapId, e) + + data := e.ProcBlockEventData + s.RuleID = data.RuleId + s.TargetType = enforcementTargetTypeString(data.TargetType) + s.EventType = s.TargetType + s.Action = enforcementActionString(data.Action) + s.Mechanism = data.Mechanism + s.Guarantee = data.Guarantee + s.Errno = data.Errno + s.Pid = data.Pid + s.ParentPid = data.ParentPid + s.RootPID = data.AiAgentRootPid + if s.RootPID == 0 { + s.RootPID = e.AiAgentRootPid + } + s.UID = data.Uid + s.GID = data.Gid + s.ProcessKName = string(data.Comm) + if s.ProcessKName == "" { + s.ProcessKName = string(e.ProcessKname) + } + s.Cmdline = string(data.Cmdline) + s.ExecPath = string(data.ExecPath) + s.SyscallName = data.SyscallName + s.SyscallID = data.SyscallId + s.PolicyEpoch = data.PolicyEpoch + s.SyscallThread = e.ThreadId + s.SyscallCoroutine = e.CoroutineId + + if d.procEventWriters != nil && d.procEventWriters.ProcBlockWriter != nil { + d.procEventWriters.ProcBlockWriter.WriteCKItem(s) + return + } + s.Release() +} + func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { switch routeProcEventType(e) { case common.FILE_EVENT: @@ -642,6 +716,8 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { d.writeProcPermEvent(vtapId, e) case common.PROC_OPS_EVENT: d.writeProcOpsEvent(vtapId, e) + case common.PROC_BLOCK_EVENT: + d.writeProcBlockEvent(vtapId, e) } } diff --git a/server/ingester/event/decoder/decoder_test.go b/server/ingester/event/decoder/decoder_test.go index e6d4b9b8e0b..1488dcae80a 100644 --- a/server/ingester/event/decoder/decoder_test.go +++ b/server/ingester/event/decoder/decoder_test.go @@ -52,6 +52,16 @@ func TestRouteProcEventType(t *testing.T) { }, want: common.PROC_OPS_EVENT, }, + { + name: "proc block goes to proc_block_event", + event: &pb.ProcEvent{ + EventType: pb.EventType_ProcBlockEvent, + ProcBlockEventData: &pb.ProcBlockEventData{ + RuleId: "block-reboot", + }, + }, + want: common.PROC_BLOCK_EVENT, + }, } for _, tt := range tests { diff --git a/server/ingester/event/event/event.go b/server/ingester/event/event/event.go index a1d752d60a1..e12751f650d 100644 --- a/server/ingester/event/event/event.go +++ b/server/ingester/event/event/event.go @@ -232,12 +232,17 @@ func NewEventor(eventType common.EventType, config *config.Config, recv *receive if err != nil { return nil, err } + procBlockWriter, err := dbwriter.NewEventWriter(common.PROC_BLOCK_EVENT, i, config) + if err != nil { + return nil, err + } procEventWriters = &decoder.ProcEventWriters{ - FileWriter: eventWriter, - FileAggWriter: fileAggWriter, - FileMgmtWriter: fileMgmtWriter, - ProcPermWriter: procPermWriter, - ProcOpsWriter: procOpsWriter, + FileWriter: eventWriter, + FileAggWriter: fileAggWriter, + FileMgmtWriter: fileMgmtWriter, + ProcPermWriter: procPermWriter, + ProcOpsWriter: procOpsWriter, + ProcBlockWriter: procBlockWriter, } } platformDatas[i], err = platformDataManager.NewPlatformInfoTable("event-" + eventType.String() + "-" + strconv.Itoa(i)) diff --git a/server/ingester/exporters/config/config.go b/server/ingester/exporters/config/config.go index 244d675fd49..d80b9e26e52 100644 --- a/server/ingester/exporters/config/config.go +++ b/server/ingester/exporters/config/config.go @@ -73,6 +73,7 @@ const ( PROC_OPS_EVENT L4_FLOW_LOG L7_FLOW_LOG + PROC_BLOCK_EVENT MAX_DATASOURCE_ID ) @@ -93,6 +94,7 @@ var dataSourceStrings = []string{ PROC_OPS_EVENT: "event.proc_ops_event", L4_FLOW_LOG: "flow_log.l4_flow_log", L7_FLOW_LOG: "flow_log.l7_flow_log", + PROC_BLOCK_EVENT: "event.proc_block_event", MAX_DATASOURCE_ID: "invalid_datasource", } @@ -112,6 +114,7 @@ var dataSourceTopicStrings = []string{ PROC_OPS_EVENT: TOPIC_PREFIX + dataSourceStrings[PROC_OPS_EVENT], L4_FLOW_LOG: TOPIC_PREFIX + dataSourceStrings[L4_FLOW_LOG], L7_FLOW_LOG: TOPIC_PREFIX + dataSourceStrings[L7_FLOW_LOG], + PROC_BLOCK_EVENT: TOPIC_PREFIX + dataSourceStrings[PROC_BLOCK_EVENT], MAX_DATASOURCE_ID: TOPIC_PREFIX + dataSourceStrings[MAX_DATASOURCE_ID], } @@ -157,7 +160,7 @@ func (d DataSourceID) TopicString() string { func (d DataSourceID) IsMap() bool { switch d { - case NETWORK_1M, APPLICATION_1M, NETWORK_1S, APPLICATION_1S, FILE_EVENT, FILE_AGG_EVENT, FILE_MGMT_EVENT, PROC_PERM_EVENT, PROC_OPS_EVENT: + case NETWORK_1M, APPLICATION_1M, NETWORK_1S, APPLICATION_1S, FILE_EVENT, FILE_AGG_EVENT, FILE_MGMT_EVENT, PROC_PERM_EVENT, PROC_OPS_EVENT, PROC_BLOCK_EVENT: return false default: return true diff --git a/server/libs/nativetag/nativetag.go b/server/libs/nativetag/nativetag.go index f4bdc0922ac..71fcf45be6c 100644 --- a/server/libs/nativetag/nativetag.go +++ b/server/libs/nativetag/nativetag.go @@ -45,6 +45,7 @@ const ( DEEPFLOW_TENANT EXT_METRICS PROFILE + EVENT_PROC_BLOCK_EVENT MAX_NATIVE_TAG_TABLE ) @@ -62,6 +63,7 @@ var NativeTagDatabaseNames = [MAX_NATIVE_TAG_TABLE]string{ DEEPFLOW_TENANT: "deepflow_tenant", EXT_METRICS: "ext_metrics", PROFILE: "profile", + EVENT_PROC_BLOCK_EVENT: "event", } var NativeTagTableNames = [MAX_NATIVE_TAG_TABLE]string{ @@ -77,6 +79,7 @@ var NativeTagTableNames = [MAX_NATIVE_TAG_TABLE]string{ DEEPFLOW_TENANT: "deepflow_collector", EXT_METRICS: "metrics", PROFILE: "in_process", + EVENT_PROC_BLOCK_EVENT: "proc_block_event", } func (t NativeTagTable) Database() string { From 2f4a974c8c04cea2ba15eb7830ef583e1101a9a2 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 17:34:11 +0800 Subject: [PATCH 05/24] feat(server): add proc block event schema --- .../schema/rawsql/mysql/dml_insert.sql | 3 ++ .../schema/rawsql/mysql/issu/7.1.0.40.sql | 1 + .../schema/rawsql/postgres/dml_insert.sql | 2 + .../migrator/schema/schema_regression_test.go | 1 + server/controller/http/service/data_source.go | 2 + .../http/service/data_source_test.go | 51 ++++++++++++++----- .../clickhouse/metrics/event/proc_block_event | 3 ++ .../metrics/event/proc_block_event.ch | 3 ++ .../metrics/event/proc_block_event.en | 3 ++ .../clickhouse/tag/event/proc_block_event | 36 +++++++++++++ .../clickhouse/tag/event/proc_block_event.ch | 36 +++++++++++++ .../clickhouse/tag/event/proc_block_event.en | 36 +++++++++++++ .../querier/engine/clickhouse/common/const.go | 3 +- .../clickhouse/metrics/ai_agent_events.go | 5 ++ .../engine/clickhouse/metrics/metrics.go | 5 ++ .../engine/clickhouse/metrics/metrics_test.go | 4 ++ .../engine/clickhouse/tag/description_test.go | 1 + 17 files changed, 182 insertions(+), 13 deletions(-) create mode 100644 server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event create mode 100644 server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.ch create mode 100644 server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.en create mode 100644 server/querier/db_descriptions/clickhouse/tag/event/proc_block_event create mode 100644 server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.ch create mode 100644 server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.en diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/dml_insert.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/dml_insert.sql index 34b6c570aca..fc93a4ba3c3 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/mysql/dml_insert.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/dml_insert.sql @@ -93,6 +93,9 @@ INSERT INTO data_source (id, display_name, data_table_collection, interval_time, set @lcuuid = (select uuid()); INSERT INTO data_source (id, display_name, data_table_collection, interval_time, retention_time, lcuuid) VALUES (30, '事件-进程操作事件', 'event.proc_ops_event', 0, 7*24, @lcuuid); +set @lcuuid = (select uuid()); +INSERT INTO data_source (id, display_name, data_table_collection, interval_time, retention_time, lcuuid) + VALUES (31, '事件-进程阻断事件', 'event.proc_block_event', 0, 7*24, @lcuuid); INSERT INTO region (id, name, lcuuid) values(1, '系统默认', 'ffffffff-ffff-ffff-ffff-ffffffffffff'); INSERT INTO az (id, name, lcuuid, region, domain) values(1, '系统默认', 'ffffffff-ffff-ffff-ffff-ffffffffffff', 'ffffffff-ffff-ffff-ffff-ffffffffffff', 'ffffffff-ffff-ffff-ffff-ffffffffffff'); diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql index f29bff0fe73..b4a677710a8 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql @@ -64,6 +64,7 @@ CALL InsertDataSourceIfNotExists('事件-文件读写聚合事件', 'event.file_ CALL InsertDataSourceIfNotExists('事件-文件管理事件', 'event.file_mgmt_event', 0, 7*24); CALL InsertDataSourceIfNotExists('事件-进程权限事件', 'event.proc_perm_event', 0, 7*24); CALL InsertDataSourceIfNotExists('事件-进程操作事件', 'event.proc_ops_event', 0, 7*24); +CALL InsertDataSourceIfNotExists('事件-进程阻断事件', 'event.proc_block_event', 0, 7*24); DROP PROCEDURE AddColumnIfNotExists; DROP PROCEDURE InsertDataSourceIfNotExists; diff --git a/server/controller/db/metadb/migrator/schema/rawsql/postgres/dml_insert.sql b/server/controller/db/metadb/migrator/schema/rawsql/postgres/dml_insert.sql index c1510be18e3..e812da4723b 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/postgres/dml_insert.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/postgres/dml_insert.sql @@ -90,6 +90,8 @@ INSERT INTO data_source (id, display_name, data_table_collection, interval_time, VALUES (29, '事件-进程权限事件', 'event.proc_perm_event', 0, 7 * 24, public.gen_random_uuid()); INSERT INTO data_source (id, display_name, data_table_collection, interval_time, retention_time, lcuuid) VALUES (30, '事件-进程操作事件', 'event.proc_ops_event', 0, 7 * 24, public.gen_random_uuid()); +INSERT INTO data_source (id, display_name, data_table_collection, interval_time, retention_time, lcuuid) +VALUES (31, '事件-进程阻断事件', 'event.proc_block_event', 0, 7 * 24, public.gen_random_uuid()); INSERT INTO region (id, name, lcuuid) VALUES (1, '系统默认', 'ffffffff-ffff-ffff-ffff-ffffffffffff'); INSERT INTO az (id, name, lcuuid, region, domain) VALUES (1, '系统默认', 'ffffffff-ffff-ffff-ffff-ffffffffffff', 'ffffffff-ffff-ffff-ffff-ffffffffffff', 'ffffffff-ffff-ffff-ffff-ffffffffffff'); diff --git a/server/controller/db/metadb/migrator/schema/schema_regression_test.go b/server/controller/db/metadb/migrator/schema/schema_regression_test.go index 5a0435c1507..10f30f932aa 100644 --- a/server/controller/db/metadb/migrator/schema/schema_regression_test.go +++ b/server/controller/db/metadb/migrator/schema/schema_regression_test.go @@ -47,6 +47,7 @@ func TestMySQLDMLInsert_AIAgentEventDataSourcesShareDefaultRetention(t *testing. "VALUES (28, '事件-文件管理事件', 'event.file_mgmt_event', 0, 7*24, @lcuuid);", "VALUES (29, '事件-进程权限事件', 'event.proc_perm_event', 0, 7*24, @lcuuid);", "VALUES (30, '事件-进程操作事件', 'event.proc_ops_event', 0, 7*24, @lcuuid);", + "VALUES (31, '事件-进程阻断事件', 'event.proc_block_event', 0, 7*24, @lcuuid);", } for _, item := range required { if !strings.Contains(sql, item) { diff --git a/server/controller/http/service/data_source.go b/server/controller/http/service/data_source.go index 1807eb44af9..e50f665f9a7 100644 --- a/server/controller/http/service/data_source.go +++ b/server/controller/http/service/data_source.go @@ -101,6 +101,7 @@ var DEFAULT_DATA_SOURCE_DISPLAY_NAMES = []string{ "事件-文件管理事件", // event.file_mgmt_event "事件-进程权限事件", // event.proc_perm_event "事件-进程操作事件", // event.proc_ops_event + "事件-进程阻断事件", // event.proc_block_event "事件-文件读写指标", // event.file_event_metrics "事件-告警事件", // event.alert_event "应用-性能剖析", // profile.in_process @@ -114,6 +115,7 @@ var aiAgentRetentionCollections = []string{ "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", } func linkedRetentionCollections(collection string) []string { diff --git a/server/controller/http/service/data_source_test.go b/server/controller/http/service/data_source_test.go index 1d754a96a34..379c3d8c48c 100644 --- a/server/controller/http/service/data_source_test.go +++ b/server/controller/http/service/data_source_test.go @@ -101,6 +101,13 @@ func Test_getTableName(t *testing.T) { }, want: "event.proc_ops_event", }, + { + name: "event.proc_block_event", + args: args{ + collection: "event.proc_block_event", + }, + want: "event.proc_block_event", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -214,6 +221,7 @@ func TestLinkedRetentionCollections(t *testing.T) { "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", }, }, { @@ -224,6 +232,7 @@ func TestLinkedRetentionCollections(t *testing.T) { "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", }, }, { @@ -234,6 +243,7 @@ func TestLinkedRetentionCollections(t *testing.T) { "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", }, }, { @@ -244,6 +254,18 @@ func TestLinkedRetentionCollections(t *testing.T) { "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", + }, + }, + { + name: "proc block event", + collection: "event.proc_block_event", + want: []string{ + "event.file_agg_event", + "event.file_mgmt_event", + "event.proc_perm_event", + "event.proc_ops_event", + "event.proc_block_event", }, }, { @@ -269,13 +291,14 @@ func TestResolveRetentionTargets(t *testing.T) { {Lcuuid: "b", DataTableCollection: "event.file_mgmt_event", RetentionTime: 24}, {Lcuuid: "c", DataTableCollection: "event.proc_perm_event", RetentionTime: 24}, {Lcuuid: "d", DataTableCollection: "event.proc_ops_event", RetentionTime: 24}, - {Lcuuid: "e", DataTableCollection: "event.event", RetentionTime: 24}, + {Lcuuid: "e", DataTableCollection: "event.proc_block_event", RetentionTime: 24}, + {Lcuuid: "f", DataTableCollection: "event.event", RetentionTime: 24}, } t.Run("ai agent event resolves to linked group", func(t *testing.T) { got := resolveRetentionTargets(all[0], all) want := []metadbmodel.DataSource{ - all[0], all[1], all[2], all[3], + all[0], all[1], all[2], all[3], all[4], } if !reflect.DeepEqual(got, want) { t.Fatalf("resolveRetentionTargets(ai-agent) = %v, want %v", got, want) @@ -283,8 +306,8 @@ func TestResolveRetentionTargets(t *testing.T) { }) t.Run("non linked collection resolves to self only", func(t *testing.T) { - got := resolveRetentionTargets(all[4], all) - want := []metadbmodel.DataSource{all[4]} + got := resolveRetentionTargets(all[5], all) + want := []metadbmodel.DataSource{all[5]} if !reflect.DeepEqual(got, want) { t.Fatalf("resolveRetentionTargets(non-linked) = %v, want %v", got, want) } @@ -315,7 +338,8 @@ func TestUpdateDataSourceLinksAIAgentRetentionGroup(t *testing.T) { {ID: 28, DisplayName: "事件-文件管理事件", DataTableCollection: "event.file_mgmt_event", RetentionTime: retention, Lcuuid: "b", UpdatedAt: time.Now()}, {ID: 29, DisplayName: "事件-进程权限事件", DataTableCollection: "event.proc_perm_event", RetentionTime: retention, Lcuuid: "c", UpdatedAt: time.Now()}, {ID: 30, DisplayName: "事件-进程操作事件", DataTableCollection: "event.proc_ops_event", RetentionTime: retention, Lcuuid: "d", UpdatedAt: time.Now()}, - {ID: 31, DisplayName: "事件-资源变更事件", DataTableCollection: "event.event", RetentionTime: retention, Lcuuid: "e", UpdatedAt: time.Now()}, + {ID: 31, DisplayName: "事件-进程阻断事件", DataTableCollection: "event.proc_block_event", RetentionTime: retention, Lcuuid: "e", UpdatedAt: time.Now()}, + {ID: 32, DisplayName: "事件-资源变更事件", DataTableCollection: "event.event", RetentionTime: retention, Lcuuid: "f", UpdatedAt: time.Now()}, } if err := gormDB.Create(&dataSources).Error; err != nil { t.Fatalf("insert data sources failed: %v", err) @@ -374,13 +398,13 @@ func TestUpdateDataSourceLinksAIAgentRetentionGroup(t *testing.T) { t.Fatalf("query updated data sources failed: %v", err) } - for _, item := range updated[:4] { + for _, item := range updated[:5] { if item.RetentionTime != newRetention { t.Fatalf("collection %s retention = %d, want %d", item.DataTableCollection, item.RetentionTime, newRetention) } } - if updated[4].RetentionTime != retention { - t.Fatalf("non-linked collection retention = %d, want %d", updated[4].RetentionTime, retention) + if updated[5].RetentionTime != retention { + t.Fatalf("non-linked collection retention = %d, want %d", updated[5].RetentionTime, retention) } wantCollections := []string{ @@ -388,6 +412,7 @@ func TestUpdateDataSourceLinksAIAgentRetentionGroup(t *testing.T) { "event.file_mgmt_event", "event.proc_perm_event", "event.proc_ops_event", + "event.proc_block_event", } if !reflect.DeepEqual(modifiedCollections, wantCollections) { t.Fatalf("CallIngesterAPIModRP collections = %v, want %v", modifiedCollections, wantCollections) @@ -418,7 +443,8 @@ func TestUpdateDataSourceMarksWholeAIAgentRetentionGroupExceptionOnFailure(t *te {ID: 28, DisplayName: "事件-文件管理事件", DataTableCollection: "event.file_mgmt_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "b", UpdatedAt: time.Now()}, {ID: 29, DisplayName: "事件-进程权限事件", DataTableCollection: "event.proc_perm_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "c", UpdatedAt: time.Now()}, {ID: 30, DisplayName: "事件-进程操作事件", DataTableCollection: "event.proc_ops_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "d", UpdatedAt: time.Now()}, - {ID: 31, DisplayName: "事件-资源变更事件", DataTableCollection: "event.event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "e", UpdatedAt: time.Now()}, + {ID: 31, DisplayName: "事件-进程阻断事件", DataTableCollection: "event.proc_block_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "e", UpdatedAt: time.Now()}, + {ID: 32, DisplayName: "事件-资源变更事件", DataTableCollection: "event.event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "f", UpdatedAt: time.Now()}, } if err := gormDB.Create(&dataSources).Error; err != nil { t.Fatalf("insert data sources failed: %v", err) @@ -474,7 +500,7 @@ func TestUpdateDataSourceMarksWholeAIAgentRetentionGroupExceptionOnFailure(t *te t.Fatalf("query updated data sources failed: %v", err) } - for _, item := range updated[:4] { + for _, item := range updated[:5] { if item.State != common.DATA_SOURCE_STATE_EXCEPTION { t.Fatalf("collection %s state = %d, want %d", item.DataTableCollection, item.State, common.DATA_SOURCE_STATE_EXCEPTION) } @@ -483,8 +509,8 @@ func TestUpdateDataSourceMarksWholeAIAgentRetentionGroupExceptionOnFailure(t *te } } - if updated[4].State != common.DATA_SOURCE_STATE_NORMAL { - t.Fatalf("non-linked collection state = %d, want %d", updated[4].State, common.DATA_SOURCE_STATE_NORMAL) + if updated[5].State != common.DATA_SOURCE_STATE_NORMAL { + t.Fatalf("non-linked collection state = %d, want %d", updated[5].State, common.DATA_SOURCE_STATE_NORMAL) } } @@ -512,6 +538,7 @@ func TestUpdateDataSourceRollsBackLinkedRetentionGroupOnMetadbFailure(t *testing {ID: 28, DisplayName: "事件-文件管理事件", DataTableCollection: "event.file_mgmt_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "b", UpdatedAt: time.Now()}, {ID: 29, DisplayName: "事件-进程权限事件", DataTableCollection: "event.proc_perm_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "c", UpdatedAt: time.Now()}, {ID: 30, DisplayName: "事件-进程操作事件", DataTableCollection: "event.proc_ops_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "d", UpdatedAt: time.Now()}, + {ID: 31, DisplayName: "事件-进程阻断事件", DataTableCollection: "event.proc_block_event", RetentionTime: retention, State: common.DATA_SOURCE_STATE_NORMAL, Lcuuid: "e", UpdatedAt: time.Now()}, } if err := gormDB.Create(&dataSources).Error; err != nil { t.Fatalf("insert data sources failed: %v", err) diff --git a/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event new file mode 100644 index 00000000000..c5908a24e5e --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event @@ -0,0 +1,3 @@ +# Field , DBField , Type , Category , Permission +log_count , , counter , Throughput , 111 +row , , other , Other , 111 diff --git a/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.ch b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.ch new file mode 100644 index 00000000000..cc796d44297 --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.ch @@ -0,0 +1,3 @@ +# Field , DisplayName , Unit , Description +log_count , 日志总量 , 个 , +row , 行数 , 个 , diff --git a/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.en b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.en new file mode 100644 index 00000000000..5c634710cae --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/metrics/event/proc_block_event.en @@ -0,0 +1,3 @@ +# Field , DisplayName , Unit , Description +log_count , Log Count , , +row , Row Count , , diff --git a/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event new file mode 100644 index 00000000000..6fd95efa1f0 --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event @@ -0,0 +1,36 @@ +# Name , ClientName , ServerName , Type , EnumFile , Category , Permission , Deprecated +time_str , time_str , time_str , time , , Timestamp , 111 , 0 +_id , _id , _id , id , , Event Info , 111 , 0 +time , time , time , time , , Event Info , 111 , 0 +start_time , start_time , start_time , int , , Event Info , 111 , 0 +end_time , end_time , end_time , int , , Event Info , 111 , 0 +region , region , region , resource , , Universal Tag , 110 , 0 +pod , pod , pod , resource , , Universal Tag , 111 , 0 +gprocess , gprocess , gprocess , resource , , Universal Tag , 111 , 0 +gprocess.biz_type , gprocess.biz_type , gprocess.biz_type , int_enum , biz_type , Business Info , 111 , 0 +ip , ip , ip , ip , , Network Layer , 111 , 0 +is_ipv4 , is_ipv4 , is_ipv4 , int_enum , ip_type , Network Layer , 111 , 0 + +event_type , event_type , event_type , string , , Event Info , 111 , 0 +process_kname , process_kname , process_kname , string , , Service Info , 111 , 0 +app_instance , app_instance , app_instance , string , , Service Info , 111 , 0 +agent , agent , agent , resource , , Capture Info , 111 , 0 +signal_source , signal_source , signal_source , int_enum , file_event_signal_source, Capture Info , 111 , 0 +rule_id , rule_id , rule_id , string , , Event Info , 111 , 0 +target_type , target_type , target_type , string , , Event Info , 111 , 0 +action , action , action , string , , Event Info , 111 , 0 +mechanism , mechanism , mechanism , string , , Event Info , 111 , 0 +guarantee , guarantee , guarantee , string , , Event Info , 111 , 0 +errno , errno , errno , int , , Event Info , 111 , 0 +pid , pid , pid , int , , Event Info , 111 , 0 +parent_pid , parent_pid , parent_pid , int , , Event Info , 111 , 0 +root_pid , root_pid , root_pid , int , , Event Info , 111 , 0 +uid , uid , uid , int , , Event Info , 111 , 0 +gid , gid , gid , int , , Event Info , 111 , 0 +cmdline , cmdline , cmdline , string , , Event Info , 111 , 0 +exec_path , exec_path , exec_path , string , , Event Info , 111 , 0 +syscall_name , syscall_name , syscall_name , string , , Event Info , 111 , 0 +syscall_id , syscall_id , syscall_id , int , , Event Info , 111 , 0 +policy_epoch , policy_epoch , policy_epoch , int , , Event Info , 111 , 0 +syscall_thread , syscall_thread , syscall_thread , int , , Tracing Info , 111 , 0 +syscall_coroutine , syscall_coroutine , syscall_coroutine , int , , Tracing Info , 111 , 0 diff --git a/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.ch b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.ch new file mode 100644 index 00000000000..2791f3013fd --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.ch @@ -0,0 +1,36 @@ +# Name , DisplayName , Description +time_str , time_str , +_id , _id , +time , time , +start_time , start_time , +end_time , end_time , +region , region , +pod , pod , +gprocess , gprocess , +gprocess.biz_type , 进程业务类型 , +ip , ip , +is_ipv4 , is_ipv4 , + +event_type , event_type , +process_kname , process_kname , +app_instance , app_instance , +agent , agent , +signal_source , signal_source , +rule_id , 规则 ID , 命中的阻断规则 ID +target_type , 阻断目标类型 , exec 或 syscall +action , 处置动作 , audit、deny 或 sigkill +mechanism , 阻断机制 , lsm、kprobe_override、sigkill、seccomp 或 user_space_audit +guarantee , 阻断保证 , prevented、best_effort 或 audit_only +errno , errno , 返回给进程的错误码 +pid , pid , +parent_pid , parent_pid , +root_pid , root_pid , +uid , uid , +gid , gid , +cmdline , cmdline , +exec_path , exec_path , +syscall_name , syscall_name , +syscall_id , syscall_id , +policy_epoch , policy_epoch , 策略版本 +syscall_thread , syscall_thread , +syscall_coroutine , syscall_coroutine , diff --git a/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.en b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.en new file mode 100644 index 00000000000..ff1c4615e02 --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/tag/event/proc_block_event.en @@ -0,0 +1,36 @@ +# Name , DisplayName , Description +time_str , time_str , +_id , _id , +time , time , +start_time , start_time , +end_time , end_time , +region , region , +pod , pod , +gprocess , gprocess , +gprocess.biz_type , Process Business Type , +ip , ip , +is_ipv4 , is_ipv4 , + +event_type , event_type , +process_kname , process_kname , +app_instance , app_instance , +agent , agent , +signal_source , signal_source , +rule_id , Rule ID , Matched enforcement rule ID +target_type , Target Type , exec or syscall +action , Action , audit, deny, or sigkill +mechanism , Mechanism , lsm, kprobe_override, sigkill, seccomp, or user_space_audit +guarantee , Guarantee , prevented, best_effort, or audit_only +errno , errno , Error code returned to the process +pid , pid , +parent_pid , parent_pid , +root_pid , root_pid , +uid , uid , +gid , gid , +cmdline , cmdline , +exec_path , exec_path , +syscall_name , syscall_name , +syscall_id , syscall_id , +policy_epoch , Policy Epoch , Policy version +syscall_thread , syscall_thread , +syscall_coroutine , syscall_coroutine , diff --git a/server/querier/engine/clickhouse/common/const.go b/server/querier/engine/clickhouse/common/const.go index fbb834de2aa..d8463a6bb76 100644 --- a/server/querier/engine/clickhouse/common/const.go +++ b/server/querier/engine/clickhouse/common/const.go @@ -38,6 +38,7 @@ const TABLE_NAME_FILE_AGG_EVENT = "file_agg_event" const TABLE_NAME_FILE_MGMT_EVENT = "file_mgmt_event" const TABLE_NAME_PROC_PERM_EVENT = "proc_perm_event" const TABLE_NAME_PROC_OPS_EVENT = "proc_ops_event" +const TABLE_NAME_PROC_BLOCK_EVENT = "proc_block_event" const TABLE_NAME_IN_PROCESS = "in_process" const TABLE_NAME_IN_PROCESS_METRICS = "in_process_metrics" const TABLE_NAME_FILE_EVENT_METRICS = "file_event_metrics" @@ -73,7 +74,7 @@ var DB_TABLE_MAP = map[string][]string{ DB_NAME_EXT_METRICS: []string{"ext_common"}, DB_NAME_DEEPFLOW_ADMIN: []string{"deepflow_server"}, DB_NAME_DEEPFLOW_TENANT: []string{"deepflow_collector"}, - DB_NAME_EVENT: []string{"event", "file_event", TABLE_NAME_FILE_AGG_EVENT, TABLE_NAME_FILE_MGMT_EVENT, TABLE_NAME_PROC_PERM_EVENT, TABLE_NAME_PROC_OPS_EVENT, TABLE_NAME_ALERT_EVENT, TABLE_NAME_ALERT_RECORD, TABLE_NAME_FILE_EVENT_METRICS}, + DB_NAME_EVENT: []string{"event", "file_event", TABLE_NAME_FILE_AGG_EVENT, TABLE_NAME_FILE_MGMT_EVENT, TABLE_NAME_PROC_PERM_EVENT, TABLE_NAME_PROC_OPS_EVENT, TABLE_NAME_PROC_BLOCK_EVENT, TABLE_NAME_ALERT_EVENT, TABLE_NAME_ALERT_RECORD, TABLE_NAME_FILE_EVENT_METRICS}, DB_NAME_PROFILE: []string{"in_process", TABLE_NAME_IN_PROCESS_METRICS}, DB_NAME_PROMETHEUS: []string{"samples"}, DB_NAME_APPLICATION_LOG: []string{"log"}, diff --git a/server/querier/engine/clickhouse/metrics/ai_agent_events.go b/server/querier/engine/clickhouse/metrics/ai_agent_events.go index e268e450069..ddbe17e0b3b 100644 --- a/server/querier/engine/clickhouse/metrics/ai_agent_events.go +++ b/server/querier/engine/clickhouse/metrics/ai_agent_events.go @@ -19,3 +19,8 @@ var PROC_OPS_EVENT_METRICS = map[string]*Metrics{} var PROC_OPS_EVENT_METRICS_REPLACE = map[string]*Metrics{ "log_count": NewReplaceMetrics("1", ""), } + +var PROC_BLOCK_EVENT_METRICS = map[string]*Metrics{} +var PROC_BLOCK_EVENT_METRICS_REPLACE = map[string]*Metrics{ + "log_count": NewReplaceMetrics("1", ""), +} diff --git a/server/querier/engine/clickhouse/metrics/metrics.go b/server/querier/engine/clickhouse/metrics/metrics.go index 876693b79b8..d667a8b603f 100644 --- a/server/querier/engine/clickhouse/metrics/metrics.go +++ b/server/querier/engine/clickhouse/metrics/metrics.go @@ -378,6 +378,8 @@ func GetMetricsByDBTableStatic(db string, table string, customMetrics map[string return PROC_PERM_EVENT_METRICS case ckcommon.TABLE_NAME_PROC_OPS_EVENT: return PROC_OPS_EVENT_METRICS + case ckcommon.TABLE_NAME_PROC_BLOCK_EVENT: + return PROC_BLOCK_EVENT_METRICS case ckcommon.TABLE_NAME_ALERT_EVENT, ckcommon.TABLE_NAME_ALERT_RECORD: return GetAlarmEventMetrics() case ckcommon.TABLE_NAME_FILE_EVENT_METRICS: @@ -754,6 +756,9 @@ func MergeMetrics(db string, table string, loadMetrics map[string]*Metrics) erro case ckcommon.TABLE_NAME_PROC_OPS_EVENT: metrics = PROC_OPS_EVENT_METRICS replaceMetrics = PROC_OPS_EVENT_METRICS_REPLACE + case ckcommon.TABLE_NAME_PROC_BLOCK_EVENT: + metrics = PROC_BLOCK_EVENT_METRICS + replaceMetrics = PROC_BLOCK_EVENT_METRICS_REPLACE case ckcommon.TABLE_NAME_ALERT_EVENT, ckcommon.TABLE_NAME_ALERT_RECORD: metrics = ALARM_EVENT_METRICS replaceMetrics = ALARM_EVENT_METRICS_REPLACE diff --git a/server/querier/engine/clickhouse/metrics/metrics_test.go b/server/querier/engine/clickhouse/metrics/metrics_test.go index cdd01cc1ce3..be7d8c26603 100644 --- a/server/querier/engine/clickhouse/metrics/metrics_test.go +++ b/server/querier/engine/clickhouse/metrics/metrics_test.go @@ -101,6 +101,9 @@ func TestCheckDBField(t *testing.T) { case chCommon.TABLE_NAME_PROC_OPS_EVENT: metrics = PROC_OPS_EVENT_METRICS replaceMetrics = PROC_OPS_EVENT_METRICS_REPLACE + case chCommon.TABLE_NAME_PROC_BLOCK_EVENT: + metrics = PROC_BLOCK_EVENT_METRICS + replaceMetrics = PROC_BLOCK_EVENT_METRICS_REPLACE } } if metrics == nil { @@ -152,6 +155,7 @@ func TestLoadMetricsForNewAiAgentTables(t *testing.T) { chCommon.TABLE_NAME_FILE_MGMT_EVENT, chCommon.TABLE_NAME_PROC_PERM_EVENT, chCommon.TABLE_NAME_PROC_OPS_EVENT, + chCommon.TABLE_NAME_PROC_BLOCK_EVENT, } for _, table := range tables { loadMetrics, err := LoadMetrics("event", table, metricData.(map[string]interface{})) diff --git a/server/querier/engine/clickhouse/tag/description_test.go b/server/querier/engine/clickhouse/tag/description_test.go index c7747c368af..c871a5ffeb6 100644 --- a/server/querier/engine/clickhouse/tag/description_test.go +++ b/server/querier/engine/clickhouse/tag/description_test.go @@ -70,6 +70,7 @@ func TestGProcessBizTypeTagDescriptions(t *testing.T) { {"event", "file_mgmt_event", "gprocess.biz_type", "int_enum", "biz_type"}, {"event", "proc_perm_event", "gprocess.biz_type", "int_enum", "biz_type"}, {"event", "proc_ops_event", "gprocess.biz_type", "int_enum", "biz_type"}, + {"event", "proc_block_event", "gprocess.biz_type", "int_enum", "biz_type"}, } for _, tc := range tests { From da5c0caa2871d21213fde32b2ee5da7e944b49fc Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 17:41:54 +0800 Subject: [PATCH 06/24] feat(agent): detect kernel enforcement capabilities --- agent/src/common/kernel_capability.rs | 116 ++++++++++++++++++++++++++ agent/src/common/mod.rs | 1 + agent/src/trident.rs | 7 ++ 3 files changed, 124 insertions(+) create mode 100644 agent/src/common/kernel_capability.rs diff --git a/agent/src/common/kernel_capability.rs b/agent/src/common/kernel_capability.rs new file mode 100644 index 00000000000..d7eeb35df84 --- /dev/null +++ b/agent/src/common/kernel_capability.rs @@ -0,0 +1,116 @@ +use std::{ + fs, + io::{Cursor, Read}, + path::Path, +}; + +use flate2::read::GzDecoder; + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct KernelCapability { + pub bpf_lsm_configured: bool, + pub bpf_lsm_active: bool, + pub bpf_kprobe_override_configured: bool, + pub seccomp_filter_configured: bool, + pub btf_vmlinux_available: bool, +} + +impl KernelCapability { + pub fn detect() -> Self { + let lsm_text = fs::read_to_string("/sys/kernel/security/lsm").unwrap_or_default(); + let config_text = read_kernel_config().unwrap_or_default(); + + Self { + bpf_lsm_configured: config_enabled(&config_text, "CONFIG_BPF_LSM"), + bpf_lsm_active: lsm_has_bpf(&lsm_text), + bpf_kprobe_override_configured: config_enabled( + &config_text, + "CONFIG_BPF_KPROBE_OVERRIDE", + ), + seccomp_filter_configured: config_enabled(&config_text, "CONFIG_SECCOMP_FILTER"), + btf_vmlinux_available: Path::new("/sys/kernel/btf/vmlinux").exists(), + } + } + + pub fn supports_exec_lsm_enforcement(&self) -> bool { + self.bpf_lsm_configured && self.bpf_lsm_active + } +} + +fn lsm_has_bpf(lsm_text: &str) -> bool { + lsm_text + .trim() + .split(',') + .map(str::trim) + .any(|name| name == "bpf") +} + +fn config_enabled(config_text: &str, option: &str) -> bool { + let enabled = format!("{option}=y"); + config_text + .lines() + .map(str::trim) + .any(|line| line == enabled) +} + +fn read_kernel_config() -> Option { + if let Some(config) = read_boot_kernel_config() { + return Some(config); + } + read_proc_kernel_config() +} + +fn read_boot_kernel_config() -> Option { + let release = fs::read_to_string("/proc/sys/kernel/osrelease").ok()?; + let path = format!("/boot/config-{}", release.trim()); + fs::read_to_string(path).ok() +} + +fn read_proc_kernel_config() -> Option { + let compressed = fs::read("/proc/config.gz").ok()?; + decode_gzip(&compressed).ok() +} + +fn decode_gzip(bytes: &[u8]) -> Result { + let mut decoder = GzDecoder::new(Cursor::new(bytes)); + let mut output = String::new(); + decoder.read_to_string(&mut output)?; + Ok(output) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_lsm_detects_bpf() { + assert!(lsm_has_bpf("lockdown,capability,yama,apparmor,bpf")); + assert!(!lsm_has_bpf("lockdown,capability,yama,apparmor")); + } + + #[test] + fn parse_config_detects_bpf_lsm() { + assert!(config_enabled("CONFIG_BPF_LSM=y\n", "CONFIG_BPF_LSM")); + assert!(!config_enabled( + "# CONFIG_BPF_LSM is not set\n", + "CONFIG_BPF_LSM" + )); + } + + #[test] + fn support_exec_lsm_requires_config_and_active_lsm() { + assert!(KernelCapability { + bpf_lsm_configured: true, + bpf_lsm_active: true, + ..Default::default() + } + .supports_exec_lsm_enforcement()); + + assert!(!KernelCapability { + bpf_lsm_configured: true, + bpf_lsm_active: false, + ..Default::default() + } + .supports_exec_lsm_enforcement()); + } +} diff --git a/agent/src/common/mod.rs b/agent/src/common/mod.rs index 5737924a3b1..05a8524a126 100644 --- a/agent/src/common/mod.rs +++ b/agent/src/common/mod.rs @@ -21,6 +21,7 @@ pub mod endpoint; mod error; pub mod feature; pub mod flow; +pub mod kernel_capability; pub mod l7_protocol_info; pub mod l7_protocol_log; pub mod lookup_key; diff --git a/agent/src/trident.rs b/agent/src/trident.rs index 4019bf235f9..44717794b9d 100644 --- a/agent/src/trident.rs +++ b/agent/src/trident.rs @@ -57,6 +57,7 @@ use crate::{ common::{ enums::CaptureNetworkType, flow::L7Stats, + kernel_capability::KernelCapability, tagged_flow::{BoxedTaggedFlow, TaggedFlow}, tap_types::CaptureNetworkTyper, FeatureFlags, DEFAULT_LOG_RETENTION, DEFAULT_LOG_UNCOMPRESSED_FILE_COUNT, @@ -990,6 +991,12 @@ impl Trident { #[cfg(feature = "enterprise")] Trident::kernel_version_check(&state, &exception_handler); + let enforcement_kernel_capability = KernelCapability::detect(); + info!( + "AI Agent enforcement kernel capability: {:?}", + enforcement_kernel_capability + ); + #[cfg(feature = "enterprise")] { let _ai_agent_registry = enterprise_utils::ai_agent::init_global_registry(); From 272544c1efc0ef675ce5fd9efa22963ae7dd335c Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 18:09:32 +0800 Subject: [PATCH 07/24] feat(agent): emit audit-only ai agent block events --- agent/crates/enterprise-utils/src/lib.rs | 32 ++++++ agent/src/common/proc_event/linux.rs | 134 +++++++++++++++++++++++ agent/src/ebpf_dispatcher.rs | 43 ++++++++ 3 files changed, 209 insertions(+) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 848f84575b0..726ea1e2bf5 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -501,6 +501,38 @@ pub mod kernel_version { } } +pub mod ai_agent_enforcement { + use std::sync::Arc; + + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum EnforcementMode { + AuditOnly, + Block, + } + + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct PolicyHit { + pub rule_index: u32, + pub rule_id: String, + pub mode: EnforcementMode, + } + + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct CompiledExecPolicy { + pub epoch: u64, + } + + impl CompiledExecPolicy { + pub fn match_exec(&self, _exec_path: &str, _cmdline: &str) -> Option { + None + } + } + + pub fn global_exec_policy() -> Option> { + None + } +} + #[cfg(any(target_os = "linux", target_os = "android"))] pub mod rpc { pub mod remote_exec { diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 7cc366e5bda..c0754f780d6 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -313,6 +313,17 @@ pub struct ProcLifecycleInfo { pub parent_pid: u32, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ProcLifecycleExecInfo<'a> { + pub pid: u32, + pub parent_pid: u32, + pub uid: u32, + pub gid: u32, + pub comm: &'a [u8], + pub cmdline: &'a [u8], + pub exec_path: &'a [u8], +} + impl TryFrom<&[u8]> for ProcLifecycleEventData { type Error = Error; @@ -390,6 +401,13 @@ const PROC_BLOCK_CMDLINE_OFF: usize = 128; const PROC_BLOCK_EXEC_PATH_OFF: usize = 384; const PROC_BLOCK_SYSCALL_NAME_OFF: usize = 640; const PROC_BLOCK_EVENT_SIZE: usize = 672; +const PROC_BLOCK_RULE_ID_LEN: usize = 64; +const PROC_BLOCK_CMDLINE_LEN: usize = 256; +const PROC_BLOCK_EXEC_PATH_LEN: usize = 256; +const ENFORCEMENT_TARGET_EXEC: u8 = 1; +const ENFORCEMENT_ACTION_AUDIT: u8 = 1; +const ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT: u8 = 5; +const ENFORCEMENT_GUARANTEE_AUDIT_ONLY: u8 = 3; struct ProcBlockEventData { rule_id: String, @@ -698,6 +716,80 @@ impl ProcEvent { _ => None, } } + + pub fn proc_lifecycle_exec_info(&self) -> Option> { + match &self.event_data { + EventData::ProcLifecycleEvent(data) if data.lifecycle_type == PROC_LIFECYCLE_EXEC => { + Some(ProcLifecycleExecInfo { + pid: data.pid, + parent_pid: data.parent_pid, + uid: data.uid, + gid: data.gid, + comm: &data.comm, + cmdline: &data.cmdline, + exec_path: &data.exec_path, + }) + } + _ => None, + } + } + + pub fn new_proc_block_event_for_audit( + &self, + rule_id: &str, + policy_epoch: u64, + ) -> Option { + let data = match &self.event_data { + EventData::ProcLifecycleEvent(data) if data.lifecycle_type == PROC_LIFECYCLE_EXEC => { + data + } + _ => return None, + }; + let process_kname = if data.comm.is_empty() { + self.process_kname.clone() + } else { + data.comm.clone() + }; + let root_pid = self.ai_agent_root_pid; + let block_event = ProcBlockEventData { + rule_id: rule_id.chars().take(PROC_BLOCK_RULE_ID_LEN).collect(), + target_type: ENFORCEMENT_TARGET_EXEC, + action: ENFORCEMENT_ACTION_AUDIT, + mechanism: enforcement_mechanism_name(ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT) + .to_string(), + guarantee: enforcement_guarantee_name(ENFORCEMENT_GUARANTEE_AUDIT_ONLY).to_string(), + errno: 0, + pid: data.pid, + parent_pid: data.parent_pid, + ai_agent_root_pid: root_pid, + uid: data.uid, + gid: data.gid, + comm: process_kname.clone(), + cmdline: truncate_bytes(&data.cmdline, PROC_BLOCK_CMDLINE_LEN), + exec_path: truncate_bytes(&data.exec_path, PROC_BLOCK_EXEC_PATH_LEN), + syscall_name: String::new(), + syscall_id: 0, + timestamp: data.timestamp, + policy_epoch, + }; + + Some(BoxedProcEvents(Box::new(ProcEvent { + pid: data.pid, + pod_id: self.pod_id, + ai_agent_root_pid: root_pid, + thread_id: self.thread_id, + coroutine_id: self.coroutine_id, + process_kname, + start_time: self.start_time, + end_time: self.end_time, + event_type: EventType::ProcBlockEvent, + event_data: EventData::ProcBlockEvent(block_event), + }))) + } +} + +fn truncate_bytes(bytes: &[u8], limit: usize) -> Vec { + bytes.iter().copied().take(limit).collect() } #[derive(Debug)] @@ -860,6 +952,48 @@ mod tests { assert_eq!(pb.mechanism, "lsm"); } + #[test] + fn test_new_proc_block_event_for_audit_encodes_proc_block_event() { + let proc_event = ProcEvent { + pid: 13, + pod_id: 7, + ai_agent_root_pid: 100, + thread_id: 13, + coroutine_id: 0, + process_kname: b"reboot".to_vec(), + start_time: 42, + end_time: 43, + event_type: EventType::ProcLifecycleEvent, + event_data: EventData::ProcLifecycleEvent(ProcLifecycleEventData { + lifecycle_type: PROC_LIFECYCLE_EXEC, + pid: 13, + parent_pid: 10, + uid: 1000, + gid: 1000, + timestamp: 42, + comm: b"reboot".to_vec(), + cmdline: b"reboot now".to_vec(), + exec_path: b"/sbin/reboot".to_vec(), + }), + }; + + let boxed = proc_event + .new_proc_block_event_for_audit("block-reboot", 99) + .unwrap(); + let mut buf = Vec::new(); + boxed.encode(&mut buf).unwrap(); + let pb = metric::ProcEvent::decode(buf.as_slice()).unwrap(); + let block = pb.proc_block_event_data.unwrap(); + + assert_eq!(pb.event_type, metric::EventType::ProcBlockEvent as i32); + assert_eq!(block.rule_id, "block-reboot"); + assert_eq!(block.action, metric::EnforcementAction::Audit as i32); + assert_eq!(block.mechanism, "user_space_audit"); + assert_eq!(block.guarantee, "audit_only"); + assert_eq!(block.ai_agent_root_pid, 100); + assert_eq!(block.exec_path, b"/sbin/reboot"); + } + fn make_proc_block_raw( target_type: u8, action: u8, diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index cce6e272c73..8c7a903c2d6 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -185,6 +185,47 @@ fn fill_ai_agent_root_pid(event: &mut BoxedProcEvents) { } } +#[cfg(feature = "enterprise")] +#[allow(static_mut_refs)] +fn emit_ai_agent_enforcement_audit_event(event: &BoxedProcEvents) { + use enterprise_utils::ai_agent_enforcement::EnforcementMode; + + if event.0.ai_agent_root_pid == 0 { + return; + } + let Some(exec_info) = event.0.proc_lifecycle_exec_info() else { + return; + }; + if exec_info.exec_path.is_empty() { + return; + } + let Some(policy) = enterprise_utils::ai_agent_enforcement::global_exec_policy() else { + return; + }; + let exec_path = String::from_utf8_lossy(exec_info.exec_path); + let cmdline = String::from_utf8_lossy(exec_info.cmdline); + let Some(hit) = policy.match_exec(&exec_path, &cmdline) else { + return; + }; + if hit.mode != EnforcementMode::AuditOnly { + return; + } + let Some(audit_event) = event + .0 + .new_proc_block_event_for_audit(&hit.rule_id, policy.epoch) + else { + return; + }; + + unsafe { + if let Some(sender) = PROC_EVENT_SENDER.as_mut() { + if let Err(e) = sender.send(audit_event) { + warn!("ai agent enforcement audit event send error: {:?}", e); + } + } + } +} + impl OwnedCountable for SyncEbpfCounter { fn get_counters(&self) -> Vec { let rx = self.counter.rx.swap(0, Ordering::Relaxed); @@ -715,6 +756,8 @@ impl EbpfCollector { register_ai_agent_child(&event); #[cfg(feature = "enterprise")] fill_ai_agent_root_pid(&mut event); + #[cfg(feature = "enterprise")] + emit_ai_agent_enforcement_audit_event(&event); if let Err(e) = PROC_EVENT_SENDER.as_mut().unwrap().send(event) { warn!("event send ebpf error: {:?}", e); } From c89487c463847d924b6de2141829b8bd6809751e Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 18:20:38 +0800 Subject: [PATCH 08/24] feat(ebpf): support loading lsm programs --- .../test/test_ai_agent_source_contracts.py | 40 +++++- agent/src/ebpf/user/load.c | 6 + agent/src/ebpf/user/probe.c | 55 ++++++++ agent/src/ebpf/user/probe.h | 1 + agent/src/ebpf/user/tracer.c | 126 +++++++++++++++++- agent/src/ebpf/user/tracer.h | 9 ++ 6 files changed, 233 insertions(+), 4 deletions(-) diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index c1edc55c998..37532ad9cab 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -8,6 +8,10 @@ SOCKET_TRACE = ROOT / "kernel" / "socket_trace.bpf.c" FILES_RW = ROOT / "kernel" / "files_rw.bpf.c" SOCKET_C = ROOT / "user" / "socket.c" +LOAD_C = ROOT / "user" / "load.c" +PROBE_C = ROOT / "user" / "probe.c" +TRACER_H = ROOT / "user" / "tracer.h" +TRACER_C = ROOT / "user" / "tracer.c" def require(condition: bool, message: str) -> None: @@ -16,9 +20,17 @@ def require(condition: bool, message: str) -> None: sys.exit(1) -socket_trace_text = SOCKET_TRACE.read_text() -files_rw_text = FILES_RW.read_text() -socket_c_text = SOCKET_C.read_text() +def read_source(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +socket_trace_text = read_source(SOCKET_TRACE) +files_rw_text = read_source(FILES_RW) +socket_c_text = read_source(SOCKET_C) +load_text = read_source(LOAD_C) +probe_text = read_source(PROBE_C) +tracer_h_text = read_source(TRACER_H) +tracer_c_text = read_source(TRACER_C) reasm_idx = socket_trace_text.find("socket_info_ptr->reasm_bytes = 0;") finish_idx = socket_trace_text.find("socket_info_ptr->finish_reasm = false;") @@ -116,4 +128,26 @@ def require(condition: bool, message: str) -> None: "AI Agent access_permission extraction must be guarded by EXTENDED_AI_AGENT_FILE_IO_FULL", ) +require('"lsm/"' in load_text, "load.c must recognize lsm/ section prefix") +require( + "BPF_PROG_TYPE_LSM" in load_text, + "load.c must map lsm/ programs to BPF_PROG_TYPE_LSM", +) +require( + "program__attach_lsm" in probe_text, + "probe.c must provide an LSM attach helper", +) +require( + "bpf_raw_tracepoint_open" in probe_text, + "LSM attach helper must use the raw tracepoint attach syscall path", +) +require( + "struct lsm_prog" in tracer_h_text and "lsms_count" in tracer_h_text, + "tracer.h must keep LSM program attach state", +) +require( + "lsm_programs_handle" in tracer_c_text, + "tracer.c must include LSM programs in the attach lifecycle", +) + print("[OK]") diff --git a/agent/src/ebpf/user/load.c b/agent/src/ebpf/user/load.c index 05bcf36889f..1fe6d97ee06 100644 --- a/agent/src/ebpf/user/load.c +++ b/agent/src/ebpf/user/load.c @@ -47,6 +47,10 @@ #include "profile/perf_profiler.h" #include "unwind_tracer.h" +#ifndef BPF_PROG_TYPE_LSM +#define BPF_PROG_TYPE_LSM 29 +#endif + /* * When full map preallocation is too expensive, the 'BPF_F_NO_PREALLOC' * flag can be used to define a map without preallocated memory. By @@ -621,6 +625,8 @@ static enum bpf_prog_type get_prog_type(struct sec_desc *desc) prog_type = BPF_PROG_TYPE_KPROBE; } else if (!memcmp(desc->name, "tracepoint/", 11)) { prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (!memcmp(desc->name, "lsm/", 4)) { + prog_type = BPF_PROG_TYPE_LSM; } else if (!memcmp(desc->name, "perf_event", 10)) { prog_type = BPF_PROG_TYPE_PERF_EVENT; } else if (!memcmp(desc->name, "fentry/", 7) || diff --git a/agent/src/ebpf/user/probe.c b/agent/src/ebpf/user/probe.c index 3c66d8e7964..5f7e39f4e59 100644 --- a/agent/src/ebpf/user/probe.c +++ b/agent/src/ebpf/user/probe.c @@ -13,7 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include +#include #include +#include #include #include #include @@ -27,6 +30,7 @@ extern bool *cpu_online; extern int sys_cpus_count; extern int ioctl(int fd, unsigned long request, ...); +extern int bpf_raw_tracepoint_open(const char *name, int prog_fd); int bpf_get_program_fd(void *obj, const char *name, void **p) { @@ -481,6 +485,57 @@ struct ebpf_link *program__attach_kfunc(void *prog) return link; } +struct ebpf_link *program__attach_lsm(void *prog) +{ + struct ebpf_prog *ebpf_prog; + struct ebpf_link *link = NULL; + const char *hook; + int pfd; + + if (prog == NULL) { + ebpf_warning("prog is NULL.\n"); + return NULL; + } + + ebpf_prog = prog; + if (strncmp(ebpf_prog->sec_name, "lsm/", 4)) { + ebpf_warning("lsm section name %s is invalid.\n", + ebpf_prog->sec_name); + return NULL; + } + + hook = ebpf_prog->sec_name + 4; + if (hook[0] == '\0') { + ebpf_warning("lsm hook name is empty, section:%s.\n", + ebpf_prog->sec_name); + return NULL; + } + + pfd = bpf_raw_tracepoint_open(hook, ebpf_prog->prog_fd); + if (pfd < 0) { + if (errno == EOPNOTSUPP || errno == EINVAL) { + ebpf_warning("BPF LSM attach unsupported for %s: %s(%d)\n", + hook, strerror(errno), errno); + } else { + ebpf_warning("BPF LSM attach failed for %s: %s(%d)\n", + hook, strerror(errno), errno); + } + return NULL; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + close(pfd); + ebpf_warning("Call calloc() is failed.\n"); + return NULL; + } + + link->detach = ebpf_link__detach_kfunc; + link->fd = pfd; + + return link; +} + /** * attach perf event * diff --git a/agent/src/ebpf/user/probe.h b/agent/src/ebpf/user/probe.h index d751208746e..edb6d2c8d69 100644 --- a/agent/src/ebpf/user/probe.h +++ b/agent/src/ebpf/user/probe.h @@ -63,6 +63,7 @@ int program__detach_probe(struct ebpf_link *link, int bpf_get_program_fd(void *obj, const char *prog_name, void **p); struct ebpf_link *program__attach_tracepoint(void *prog); +struct ebpf_link *program__attach_lsm(void *prog); int program__attach_perf_event(int prog_fd, uint32_t ev_type, uint32_t ev_config, uint64_t sample_period, uint64_t sample_freq, pid_t pid, diff --git a/agent/src/ebpf/user/tracer.c b/agent/src/ebpf/user/tracer.c index a949cfdfe9c..67d0e464759 100644 --- a/agent/src/ebpf/user/tracer.c +++ b/agent/src/ebpf/user/tracer.c @@ -49,6 +49,10 @@ #include "deepflow_ebpfctl_bin.c" +#ifndef BPF_PROG_TYPE_LSM +#define BPF_PROG_TYPE_LSM 29 +#endif + /* * Sleep duration (in seconds) before retrying CPU binding if it fails. * This is used when binding fails and no event-based wakeup is implemented. @@ -695,6 +699,20 @@ static struct kfunc *find_kfunc_from_name(struct bpf_tracer *tracer, return NULL; } +static struct lsm_prog *find_lsm_from_name(struct bpf_tracer *tracer, + const char *name) +{ + struct lsm_prog *p; + int i; + for (i = 0; i < PROBES_NUM_MAX; i++) { + p = &tracer->lsms[i]; + if (!strcmp(p->name, name)) + return p; + } + + return NULL; +} + static struct tracepoint *get_tracepoint_from_tracer(struct bpf_tracer *tracer, const char *tp_name) { @@ -747,6 +765,29 @@ static struct kfunc *get_kfunc_from_tracer(struct bpf_tracer *tracer, return p; } +static struct lsm_prog *get_lsm_from_tracer(struct bpf_tracer *tracer, + struct ebpf_prog *prog) +{ + struct lsm_prog *p = find_lsm_from_name(tracer, prog->name); + if (p && p->prog) + return p; + + if (tracer->lsms_count >= PROBES_NUM_MAX) { + ebpf_warning("lsm programs count too many. The maximum is %d\n", + PROBES_NUM_MAX); + return NULL; + } + + int idx = tracer->lsms_count++; + p = &tracer->lsms[idx]; + p->prog_fd = prog->prog_fd; + p->prog = prog; + + snprintf(p->name, sizeof(p->name), "%s", prog->name); + + return p; +} + void add_probe_to_tracer(struct probe *pb) { struct bpf_tracer *tracer = pb->tracer; @@ -1096,6 +1137,86 @@ static int kfunc_detach(struct kfunc *p) return ETR_OK; } +static int lsm_attach(struct lsm_prog *p) +{ + if (p->link) { + return ETR_EXIST; + } + + if (p->prog->prog_fd == 0) + p->prog->prog_fd = load_ebpf_prog(p->prog); + + struct ebpf_link *bl = program__attach_lsm(p->prog); + p->link = bl; + + if (bl == NULL) { + ebpf_warning("program__attach_lsm() failed, name:%s.\n", + p->name); + __sync_fetch_and_add(&attach_failed_count, 1); + return ETR_INVAL; + } + + return ETR_OK; +} + +static int lsm_detach(struct lsm_prog *p) +{ + if (p->link == NULL) { + return ETR_NOTEXIST; + } + + if (p->link->detach) { + p->link->detach(p->link); + } + + free(p->link); + p->link = NULL; + return ETR_OK; +} + +static int lsm_programs_handle(struct bpf_tracer *tracer, int type) +{ + int (*lsm_handle) (struct lsm_prog * p) = NULL; + struct lsm_prog *lsm; + struct ebpf_object *obj = tracer->obj; + int i, error; + + if (type == HOOK_ATTACH) + lsm_handle = lsm_attach; + else if (type == HOOK_DETACH) + lsm_handle = lsm_detach; + else + return ETR_INVAL; + + for (i = 0; i < obj->progs_cnt; i++) { + if (obj->progs[i].type != BPF_PROG_TYPE_LSM) + continue; + + lsm = get_lsm_from_tracer(tracer, &obj->progs[i]); + if (!lsm) + continue; + + error = lsm_handle(lsm); + if (type == HOOK_ATTACH && error == ETR_EXIST) + continue; + + if (type == HOOK_DETACH && error == ETR_NOTEXIST) + continue; + + if (error) { + ebpf_warning + ("%s lsm: '%s', failed; enforcement disabled for this hook.", + type == HOOK_ATTACH ? "attach" : "detach", lsm->name); + continue; + } + + ebpf_info("%s lsm: '%s', succeed!", + type == HOOK_ATTACH ? "attach" : "detach", lsm->name); + } + + return ETR_OK; +} + int tracer_hooks_process(struct bpf_tracer *tracer, enum tracer_hook_type type, int *probes_count) { @@ -1165,7 +1286,7 @@ int tracer_hooks_process(struct bpf_tracer *tracer, enum tracer_hook_type type, int i; struct tracer_probes_conf *tps = tracer->tps; if (tps == NULL) - goto perf_event; + goto lsm_programs; for (i = 0; i < tps->tps_nr; i++) { tp = get_tracepoint_from_tracer(tracer, tps->tps[i].name); @@ -1214,6 +1335,9 @@ int tracer_hooks_process(struct bpf_tracer *tracer, enum tracer_hook_type type, kf->name); } +lsm_programs: + lsm_programs_handle(tracer, type); + perf_event: if (!tracer->enable_sample) diff --git a/agent/src/ebpf/user/tracer.h b/agent/src/ebpf/user/tracer.h index 056d1173bdc..098a0f42528 100644 --- a/agent/src/ebpf/user/tracer.h +++ b/agent/src/ebpf/user/tracer.h @@ -328,6 +328,13 @@ struct kfunc { int prog_fd; }; +struct lsm_prog { + char name[PROBE_NAME_SZ]; + struct ebpf_link *link; + struct ebpf_prog *prog; + int prog_fd; +}; + struct queue { int id; // Queue Identifier struct bpf_tracer *t; @@ -410,6 +417,8 @@ struct bpf_tracer { int tracepoints_count; struct kfunc kfuncs[PROBES_NUM_MAX]; int kfuncs_count; + struct lsm_prog lsms[PROBES_NUM_MAX]; + int lsms_count; pthread_mutex_t mutex_probes_lock; // Protect the probes operation in multiple threads /* From ca7f3b01a4b7cb6802721345a79fcaf0b399a57e Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 19:02:41 +0800 Subject: [PATCH 09/24] fix(ebpf): keep optional lsm programs non-fatal --- .../src/ebpf/test/test_ai_agent_source_contracts.py | 10 ++++++++++ agent/src/ebpf/user/load.c | 13 +++++++++++++ agent/src/ebpf/user/tracer.c | 13 ++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 37532ad9cab..50ca67be253 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -149,5 +149,15 @@ def read_source(path: Path) -> str: "lsm_programs_handle" in tracer_c_text, "tracer.c must include LSM programs in the attach lifecycle", ) +require( + "new_prog->type == BPF_PROG_TYPE_LSM" in load_text + and "Skip optional BPF LSM program" in load_text, + "load.c must keep unsupported BPF LSM programs non-fatal", +) +require( + "p->prog->prog_fd < 0" in tracer_c_text + and "skip unloaded lsm program" in tracer_c_text, + "tracer.c must skip unloaded optional LSM programs during attach", +) print("[OK]") diff --git a/agent/src/ebpf/user/load.c b/agent/src/ebpf/user/load.c index 1fe6d97ee06..57a73f632e6 100644 --- a/agent/src/ebpf/user/load.c +++ b/agent/src/ebpf/user/load.c @@ -988,6 +988,19 @@ static int load_obj__progs(struct ebpf_object *obj) new_prog->insns_cnt, BPF_MAXINSNS); } + /* + * BPF LSM is an optional enforcement mechanism. Kernels + * without CONFIG_BPF_LSM or an active bpf LSM can reject + * the program before attach, so keep the rest of the + * socket tracer available and let userspace fall back. + */ + if (new_prog->type == BPF_PROG_TYPE_LSM) { + ebpf_warning + ("Skip optional BPF LSM program '%s'; enforcement disabled for this hook.\n", + new_prog->name); + continue; + } + if (memcmp(desc->name, "uprobe/", 7) && memcmp(desc->name, "uretprobe/", 10)) { return ETR_INVAL; diff --git a/agent/src/ebpf/user/tracer.c b/agent/src/ebpf/user/tracer.c index 67d0e464759..b61238594c8 100644 --- a/agent/src/ebpf/user/tracer.c +++ b/agent/src/ebpf/user/tracer.c @@ -1143,8 +1143,19 @@ static int lsm_attach(struct lsm_prog *p) return ETR_EXIST; } - if (p->prog->prog_fd == 0) + if (p->prog->prog_fd < 0) { + ebpf_warning("skip unloaded lsm program, name:%s.\n", p->name); + return ETR_INVAL; + } + + if (p->prog->prog_fd == 0) { p->prog->prog_fd = load_ebpf_prog(p->prog); + if (p->prog->prog_fd < 0) { + ebpf_warning("load lsm program failed, name:%s.\n", + p->name); + return ETR_INVAL; + } + } struct ebpf_link *bl = program__attach_lsm(p->prog); p->link = bl; From 7742c79ce8bdb73c1bc9d11a639a30e3560221bd Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 19:03:02 +0800 Subject: [PATCH 10/24] test(ebpf): assert ai agent exec enforcement contract --- .../test/test_ai_agent_source_contracts.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 50ca67be253..0932dd7c7ac 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -12,6 +12,10 @@ PROBE_C = ROOT / "user" / "probe.c" TRACER_H = ROOT / "user" / "tracer.h" TRACER_C = ROOT / "user" / "tracer.c" +WORKSPACE_ROOT = ROOT.parents[3] +ENTERPRISE_AGENT = WORKSPACE_ROOT / "deepflow-core" / "agent" +ENTERPRISE_BPF = ENTERPRISE_AGENT / "src" / "ebpf" / "user" / "extended" / "bpf" +ENTERPRISE_SUPPORT = ENTERPRISE_AGENT / "scripts" / "support_extended_observability" def require(condition: bool, message: str) -> None: @@ -160,4 +164,35 @@ def read_source(path: Path) -> str: "tracer.c must skip unloaded optional LSM programs during attach", ) +if ENTERPRISE_AGENT.exists(): + exec_enforce_bpf = ENTERPRISE_BPF / "ai_agent_exec_enforce.bpf.c" + require( + exec_enforce_bpf.exists(), + f"missing enterprise AI Agent exec enforcement BPF: {exec_enforce_bpf}", + ) + exec_enforce_text = read_source(exec_enforce_bpf) + support_text = read_source(ENTERPRISE_SUPPORT) + + require( + 'SEC("lsm/bprm_check_security")' in exec_enforce_text, + "AI Agent exec enforcement must attach to lsm/bprm_check_security", + ) + require( + "is_ai_agent_process" in exec_enforce_text + or "ai_agent_pids" in exec_enforce_text, + "AI Agent exec enforcement must scope matching to AI Agent processes", + ) + require( + "DATA_SOURCE_PROC_BLOCK_EVENT" in exec_enforce_text, + "AI Agent exec enforcement must emit proc block events", + ) + require( + "ai_agent_submit_event" in exec_enforce_text, + "AI Agent exec enforcement must submit events through the AI Agent pipeline", + ) + require( + "ai_agent_exec_enforce.bpf.c" in support_text, + "support_extended_observability must include ai_agent_exec_enforce.bpf.c", + ) + print("[OK]") From bdff01e646dd503565c1e05632f7a8bc97cfd267 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 12 May 2026 19:28:38 +0800 Subject: [PATCH 11/24] feat(agent): sync ai agent enforcement policy to bpf --- agent/crates/enterprise-utils/src/lib.rs | 25 ++++ agent/src/config/handler.rs | 3 + agent/src/ebpf_dispatcher.rs | 171 ++++++++++++++++++++++- 3 files changed, 198 insertions(+), 1 deletion(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 726ea1e2bf5..806a8ad9b49 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -510,6 +510,16 @@ pub mod ai_agent_enforcement { Block, } + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct ExecRuleInput { + pub id: String, + pub mode: EnforcementMode, + pub exact: Vec, + pub prefix: Vec, + pub suffix: Vec, + pub argv_contains_any: Vec, + } + #[derive(Clone, Debug, PartialEq, Eq)] pub struct PolicyHit { pub rule_index: u32, @@ -526,8 +536,23 @@ pub mod ai_agent_enforcement { pub fn match_exec(&self, _exec_path: &str, _cmdline: &str) -> Option { None } + + pub fn sync_to_bpf_maps( + &self, + _exec_rules_fd: i32, + _policy_epoch_fd: i32, + _max_records: usize, + ) -> Result<(), String> { + Ok(()) + } + } + + pub fn compile_exec_rules(_rules: &[ExecRuleInput]) -> Result { + Ok(CompiledExecPolicy { epoch: 0 }) } + pub fn set_global_exec_policy(_policy: Option) {} + pub fn global_exec_policy() -> Option> { None } diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index ef43f8ecaf1..206f9f3d036 100644 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -1368,6 +1368,7 @@ pub struct EbpfConfig { pub epc_id: u32, pub l7_log_packet_size: usize, pub ai_agent_max_payload_size: usize, + pub ai_agent_enforcement: AiAgentEnforcementConfig, // 静态配置 pub l7_protocol_inference_max_fail_count: usize, pub l7_protocol_inference_ttl: usize, @@ -1394,6 +1395,7 @@ impl fmt::Debug for EbpfConfig { .field("epc_id", &self.epc_id) .field("l7_log_packet_size", &self.l7_log_packet_size) .field("ai_agent_max_payload_size", &self.ai_agent_max_payload_size) + .field("ai_agent_enforcement", &self.ai_agent_enforcement) .field( "l7_protocol_inference_max_fail_count", &self.l7_protocol_inference_max_fail_count, @@ -2469,6 +2471,7 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig { l7_log_packet_size: crate::ebpf::CAP_LEN_MAX .min(conf.processors.request_log.tunning.payload_truncation as usize), ai_agent_max_payload_size: conf.inputs.proc.ai_agent.max_payload_size, + ai_agent_enforcement: conf.inputs.proc.ai_agent.enforcement.clone(), l7_log_tap_types: generate_tap_types_array( &conf.outputs.flow_log.filters.l7_capture_network_types, ), diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 8c7a903c2d6..d89e3799888 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -43,7 +43,7 @@ pub mod memory_profile; use std::ffi::{CStr, CString}; use std::ptr::{self, null_mut}; use std::slice; -use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicI64, AtomicU64, Ordering}; use std::sync::Arc; use std::thread::{self, JoinHandle}; use std::time::Duration; @@ -679,6 +679,65 @@ static mut ON_CPU_PROFILE_FREQUENCY: u32 = 0; static mut PROFILE_STACK_COMPRESSION: bool = true; #[allow(static_mut_refs)] static mut TIME_DIFF: Option> = None; +#[cfg(feature = "enterprise")] +static AI_AGENT_EXEC_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); +#[cfg(feature = "enterprise")] +static AI_AGENT_POLICY_EPOCH_MAP_FD: AtomicI32 = AtomicI32::new(-1); +#[cfg(feature = "enterprise")] +const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256; + +#[cfg(feature = "enterprise")] +fn ai_agent_enforcement_mode_eq(value: &str, expected: &str) -> bool { + value.trim().eq_ignore_ascii_case(expected) +} + +#[cfg(feature = "enterprise")] +fn ai_agent_enforcement_lsm_allowed( + config: &crate::config::config::AiAgentEnforcementConfig, +) -> bool { + let mechanism_allowed = config + .allowed_mechanisms + .iter() + .any(|m| ai_agent_enforcement_mode_eq(m, "lsm")); + let strategy_allows_lsm = matches!( + config.strategy.trim().to_ascii_lowercase().as_str(), + "auto" | "lsm_only" + ); + mechanism_allowed && strategy_allows_lsm +} + +#[cfg(feature = "enterprise")] +fn ai_agent_enforcement_inputs( + config: &crate::config::config::AiAgentEnforcementConfig, + mode: enterprise_utils::ai_agent_enforcement::EnforcementMode, +) -> Vec { + config + .rules + .iter() + .filter(|rule| { + ai_agent_enforcement_mode_eq(&rule.scope, "ai_agent_tree") + && ai_agent_enforcement_mode_eq(&rule.target_type, "exec") + }) + .map(|rule| { + let rule_mode = if mode + == enterprise_utils::ai_agent_enforcement::EnforcementMode::Block + && ai_agent_enforcement_mode_eq(&rule.action.action_type, "deny") + { + enterprise_utils::ai_agent_enforcement::EnforcementMode::Block + } else { + enterprise_utils::ai_agent_enforcement::EnforcementMode::AuditOnly + }; + enterprise_utils::ai_agent_enforcement::ExecRuleInput { + id: rule.id.clone(), + mode: rule_mode, + exact: rule.exec.exact.clone(), + prefix: rule.exec.prefix.clone(), + suffix: rule.exec.suffix.clone(), + argv_contains_any: rule.exec.argv_contains_any.clone(), + } + }) + .collect() +} pub unsafe fn string_from_null_terminated_c_str(ptr: *const u8) -> String { CStr::from_ptr(ptr as *const libc::c_char) @@ -1444,6 +1503,28 @@ impl EbpfCollector { } else { warn!("AI Agent: could not find __ai_agent_pids BPF map (fd={}), file I/O monitoring will not work", fd); } + + let exec_rules_fd = unsafe { + ebpf::bpf_table_get_map_fd( + c"socket-trace".as_ptr(), + c"__ai_agent_exec_rules".as_ptr(), + ) + }; + AI_AGENT_EXEC_RULES_MAP_FD.store(exec_rules_fd, Ordering::Relaxed); + let policy_epoch_fd = unsafe { + ebpf::bpf_table_get_map_fd( + c"socket-trace".as_ptr(), + c"__ai_agent_policy_epoch".as_ptr(), + ) + }; + AI_AGENT_POLICY_EPOCH_MAP_FD.store(policy_epoch_fd, Ordering::Relaxed); + if exec_rules_fd < 0 || policy_epoch_fd < 0 { + warn!( + "AI Agent enforcement: BPF maps unavailable (__ai_agent_exec_rules={}, __ai_agent_policy_epoch={}), block mode will downgrade to audit-only", + exec_rules_fd, policy_epoch_fd + ); + } + Self::sync_ai_agent_enforcement_policy(&config.ai_agent_enforcement); } Ok(handle) @@ -1484,6 +1565,92 @@ impl EbpfCollector { } } + #[cfg(feature = "enterprise")] + fn clear_ai_agent_enforcement_bpf_maps(max_records: usize) { + let exec_rules_fd = AI_AGENT_EXEC_RULES_MAP_FD.load(Ordering::Relaxed); + let policy_epoch_fd = AI_AGENT_POLICY_EPOCH_MAP_FD.load(Ordering::Relaxed); + if exec_rules_fd < 0 || policy_epoch_fd < 0 { + return; + } + match enterprise_utils::ai_agent_enforcement::compile_exec_rules(&[]) { + Ok(policy) => { + if let Err(e) = policy.sync_to_bpf_maps(exec_rules_fd, policy_epoch_fd, max_records) + { + warn!("AI Agent enforcement: failed to clear BPF maps: {}", e); + } + } + Err(e) => warn!("AI Agent enforcement: failed to build empty policy: {}", e), + } + } + + #[cfg(feature = "enterprise")] + fn sync_ai_agent_enforcement_policy(config: &crate::config::config::AiAgentEnforcementConfig) { + use enterprise_utils::ai_agent_enforcement::{ + compile_exec_rules, set_global_exec_policy, EnforcementMode, + }; + + let max_records = config.max_rules.min(AI_AGENT_EXEC_RULES_BPF_MAX); + if !config.enabled { + set_global_exec_policy(None); + Self::clear_ai_agent_enforcement_bpf_maps(max_records); + return; + } + + let exec_rules_fd = AI_AGENT_EXEC_RULES_MAP_FD.load(Ordering::Relaxed); + let policy_epoch_fd = AI_AGENT_POLICY_EPOCH_MAP_FD.load(Ordering::Relaxed); + let bpf_maps_available = exec_rules_fd >= 0 && policy_epoch_fd >= 0; + let lsm_allowed = ai_agent_enforcement_lsm_allowed(config); + let requested_block = ai_agent_enforcement_mode_eq(&config.mode, "block"); + let effective_mode = if requested_block && bpf_maps_available && lsm_allowed { + EnforcementMode::Block + } else { + if requested_block { + warn!( + "AI Agent enforcement: block mode requested but BPF LSM is unavailable or disallowed; downgrade to audit-only (maps_available={}, lsm_allowed={})", + bpf_maps_available, lsm_allowed + ); + } + EnforcementMode::AuditOnly + }; + + let inputs = ai_agent_enforcement_inputs(config, effective_mode); + let policy = match compile_exec_rules(&inputs) { + Ok(policy) => policy, + Err(e) => { + warn!("AI Agent enforcement: failed to compile policy: {}", e); + set_global_exec_policy(None); + Self::clear_ai_agent_enforcement_bpf_maps(max_records); + return; + } + }; + + if effective_mode == EnforcementMode::Block { + if let Err(e) = policy.sync_to_bpf_maps(exec_rules_fd, policy_epoch_fd, max_records) { + warn!( + "AI Agent enforcement: failed to sync BPF policy, downgrade to audit-only: {}", + e + ); + let audit_inputs = ai_agent_enforcement_inputs(config, EnforcementMode::AuditOnly); + match compile_exec_rules(&audit_inputs) { + Ok(audit_policy) => set_global_exec_policy(Some(audit_policy)), + Err(e) => { + warn!( + "AI Agent enforcement: failed to compile audit policy: {}", + e + ); + set_global_exec_policy(None); + } + } + Self::clear_ai_agent_enforcement_bpf_maps(max_records); + return; + } + } else { + Self::clear_ai_agent_enforcement_bpf_maps(max_records); + } + + set_global_exec_policy(Some(policy)); + } + fn ebpf_start() { debug!("ebpf collector starting ebpf-kernel."); unsafe { @@ -1693,6 +1860,8 @@ impl EbpfCollector { config.l7_log_packet_size, config.ai_agent_max_payload_size, ); + #[cfg(feature = "enterprise")] + Self::sync_ai_agent_enforcement_policy(&config.ai_agent_enforcement); #[cfg(feature = "extended_observability")] { From 7eebc06014d653da567a8542407944568a5ce9f7 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 13 May 2026 14:05:42 +0800 Subject: [PATCH 12/24] fix(ebpf): harden ai agent exec lsm enforcement --- agent/src/common/kernel_capability.rs | 90 ++++++++++++++++--- .../test/test_ai_agent_exec_enforcement.sh | 55 ++++++++++++ .../test/test_ai_agent_source_contracts.py | 31 +++++++ agent/src/ebpf/user/load.c | 37 ++++++-- agent/src/ebpf/user/probe.c | 6 +- agent/src/ebpf_dispatcher.rs | 6 +- 6 files changed, 201 insertions(+), 24 deletions(-) create mode 100644 agent/src/ebpf/test/test_ai_agent_exec_enforcement.sh diff --git a/agent/src/common/kernel_capability.rs b/agent/src/common/kernel_capability.rs index d7eeb35df84..cdab440b93c 100644 --- a/agent/src/common/kernel_capability.rs +++ b/agent/src/common/kernel_capability.rs @@ -1,7 +1,7 @@ use std::{ - fs, + env, fs, io::{Cursor, Read}, - path::Path, + path::{Path, PathBuf}, }; use flate2::read::GzDecoder; @@ -17,18 +17,28 @@ pub struct KernelCapability { impl KernelCapability { pub fn detect() -> Self { - let lsm_text = fs::read_to_string("/sys/kernel/security/lsm").unwrap_or_default(); - let config_text = read_kernel_config().unwrap_or_default(); + let proc_root = path_from_env("PROCFS_ROOT", "/proc"); + let sys_root = path_from_env("SYSFS_ROOT", "/sys"); + let boot_root = + host_sibling_root(&proc_root, "boot").unwrap_or_else(|| PathBuf::from("/boot")); + + Self::detect_from_roots(&proc_root, &sys_root, &boot_root) + } + + pub fn detect_from_roots(proc_root: &Path, sys_root: &Path, boot_root: &Path) -> Self { + let lsm_text = fs::read_to_string(sys_root.join("kernel/security/lsm")).unwrap_or_default(); + let config_text = read_kernel_config_from_roots(proc_root, boot_root).unwrap_or_default(); + let bpf_lsm_active = lsm_has_bpf(&lsm_text); Self { - bpf_lsm_configured: config_enabled(&config_text, "CONFIG_BPF_LSM"), - bpf_lsm_active: lsm_has_bpf(&lsm_text), + bpf_lsm_configured: config_enabled(&config_text, "CONFIG_BPF_LSM") || bpf_lsm_active, + bpf_lsm_active, bpf_kprobe_override_configured: config_enabled( &config_text, "CONFIG_BPF_KPROBE_OVERRIDE", ), seccomp_filter_configured: config_enabled(&config_text, "CONFIG_SECCOMP_FILTER"), - btf_vmlinux_available: Path::new("/sys/kernel/btf/vmlinux").exists(), + btf_vmlinux_available: sys_root.join("kernel/btf/vmlinux").exists(), } } @@ -37,6 +47,18 @@ impl KernelCapability { } } +fn path_from_env(name: &str, default: &str) -> PathBuf { + env::var_os(name) + .filter(|value| !value.is_empty()) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from(default)) +} + +fn host_sibling_root(proc_root: &Path, sibling: &str) -> Option { + let parent = proc_root.parent()?; + Some(parent.join(sibling)) +} + fn lsm_has_bpf(lsm_text: &str) -> bool { lsm_text .trim() @@ -54,20 +76,24 @@ fn config_enabled(config_text: &str, option: &str) -> bool { } fn read_kernel_config() -> Option { - if let Some(config) = read_boot_kernel_config() { + read_kernel_config_from_roots(Path::new("/proc"), Path::new("/boot")) +} + +fn read_kernel_config_from_roots(proc_root: &Path, boot_root: &Path) -> Option { + if let Some(config) = read_boot_kernel_config(proc_root, boot_root) { return Some(config); } - read_proc_kernel_config() + read_proc_kernel_config(proc_root) } -fn read_boot_kernel_config() -> Option { - let release = fs::read_to_string("/proc/sys/kernel/osrelease").ok()?; - let path = format!("/boot/config-{}", release.trim()); +fn read_boot_kernel_config(proc_root: &Path, boot_root: &Path) -> Option { + let release = fs::read_to_string(proc_root.join("sys/kernel/osrelease")).ok()?; + let path = boot_root.join(format!("config-{}", release.trim())); fs::read_to_string(path).ok() } -fn read_proc_kernel_config() -> Option { - let compressed = fs::read("/proc/config.gz").ok()?; +fn read_proc_kernel_config(proc_root: &Path) -> Option { + let compressed = fs::read(proc_root.join("config.gz")).ok()?; decode_gzip(&compressed).ok() } @@ -113,4 +139,40 @@ mod tests { } .supports_exec_lsm_enforcement()); } + + #[test] + fn detect_from_roots_reads_host_sysfs_lsm_in_container() { + let root = make_temp_root("host-sysfs-lsm"); + let proc_root = root.join("host-proc"); + let sys_root = root.join("host-sys"); + let boot_root = root.join("boot"); + fs::create_dir_all(sys_root.join("kernel/security")).unwrap(); + fs::create_dir_all(sys_root.join("kernel/btf")).unwrap(); + fs::create_dir_all(proc_root.join("sys/kernel")).unwrap(); + fs::write( + sys_root.join("kernel/security/lsm"), + "capability,yama,selinux,bpf", + ) + .unwrap(); + fs::write(sys_root.join("kernel/btf/vmlinux"), b"btf").unwrap(); + fs::write(proc_root.join("sys/kernel/osrelease"), "4.18.0-test\n").unwrap(); + + let capability = KernelCapability::detect_from_roots(&proc_root, &sys_root, &boot_root); + + assert!(capability.bpf_lsm_active); + assert!(capability.bpf_lsm_configured); + assert!(capability.btf_vmlinux_available); + + let _ = fs::remove_dir_all(root); + } + + fn make_temp_root(name: &str) -> std::path::PathBuf { + let root = std::env::temp_dir().join(format!( + "deepflow-kernel-capability-{name}-{}", + std::process::id() + )); + let _ = fs::remove_dir_all(&root); + fs::create_dir_all(&root).unwrap(); + root + } } diff --git a/agent/src/ebpf/test/test_ai_agent_exec_enforcement.sh b/agent/src/ebpf/test/test_ai_agent_exec_enforcement.sh new file mode 100644 index 00000000000..e3ed0c8f7f9 --- /dev/null +++ b/agent/src/ebpf/test/test_ai_agent_exec_enforcement.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Lightweight manual harness for AI Agent exec enforcement verification. +# It intentionally does not start or reconfigure deepflow-agent; effective policy +# must be delivered by DeepFlow server/controller agent-group configuration. + +SYSFS_ROOT="${SYSFS_ROOT:-/sys}" +LSM_FILE="${SYSFS_ROOT%/}/kernel/security/lsm" +BLOCKED_CMD="${BLOCKED_CMD:-/usr/bin/uname}" +SERVER_NODE="${SERVER_NODE:-10.50.120.81}" +AGENT_NODE="${AGENT_NODE:-10.50.120.21}" +NAMESPACE="${NAMESPACE:-deepflow}" +AGENT_DS="${AGENT_DS:-deepflow-agent-r4-dcn-ctrl}" + +if [[ ! -r "$LSM_FILE" ]]; then + echo "SKIP: cannot read $LSM_FILE" + exit 0 +fi + +if ! tr ',' '\n' <"$LSM_FILE" | grep -qx bpf; then + echo "SKIP: BPF LSM is not active in $LSM_FILE" + exit 0 +fi + +if [[ ! -x "$BLOCKED_CMD" ]]; then + echo "SKIP: blocked command $BLOCKED_CMD is not executable on this host" + exit 0 +fi + +echo "OK: BPF LSM active and $BLOCKED_CMD exists." +cat < 2>&1 | grep -Ei "KernelCapability|bpf_lsm|attach lsm" | tail -50' + +4. Trigger an AI endpoint hit from the same process that later executes + $BLOCKED_CMD. Expected result in block mode: + PermissionError errno=1 + +5. If event persistence is being validated, deploy a server/schema version that + contains event.proc_block_event before querying ClickHouse. +EOF diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 0932dd7c7ac..72185add84c 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -137,6 +137,12 @@ def read_source(path: Path) -> str: "BPF_PROG_TYPE_LSM" in load_text, "load.c must map lsm/ programs to BPF_PROG_TYPE_LSM", ) +require( + "prog_load_name" in load_text + and "prog->type == BPF_PROG_TYPE_LSM" in load_text + and '"lsm__%s"' in load_text, + "load.c must pass lsm__ to BCC so it sets BPF_LSM_MAC and lets libbpf find bpf_lsm_", +) require( "program__attach_lsm" in probe_text, "probe.c must provide an LSM attach helper", @@ -145,6 +151,10 @@ def read_source(path: Path) -> str: "bpf_raw_tracepoint_open" in probe_text, "LSM attach helper must use the raw tracepoint attach syscall path", ) +require( + "bpf_raw_tracepoint_open(NULL, ebpf_prog->prog_fd)" in probe_text, + "LSM attach helper must attach by loaded attach_btf_id, not by raw tracepoint hook name", +) require( "struct lsm_prog" in tracer_h_text and "lsms_count" in tracer_h_text, "tracer.h must keep LSM program attach state", @@ -177,6 +187,10 @@ def read_source(path: Path) -> str: 'SEC("lsm/bprm_check_security")' in exec_enforce_text, "AI Agent exec enforcement must attach to lsm/bprm_check_security", ) + require( + "BPF_PROG(bpf_lsm_bprm_check_security," in exec_enforce_text, + "AI Agent exec enforcement BPF function name must match the bpf_lsm_ BTF name for BCC/libbpf lookup", + ) require( "is_ai_agent_process" in exec_enforce_text or "ai_agent_pids" in exec_enforce_text, @@ -186,6 +200,23 @@ def read_source(path: Path) -> str: "DATA_SOURCE_PROC_BLOCK_EVENT" in exec_enforce_text, "AI Agent exec enforcement must emit proc block events", ) + require( + "#define AI_AGENT_EXEC_MAX_RULES 8" in exec_enforce_text, + "AI Agent exec enforcement must cap BPF-side rule scan to 8 records to stay under old verifier complexity limits", + ) + require( + "ai_agent_match_contains" not in exec_enforce_text + and "AI_AGENT_EXEC_MATCH_ARGV_CONTAINS" not in exec_enforce_text, + "AI Agent exec enforcement BPF must not include argv_contains nested scans on old verifier kernels", + ) + require( + "pattern_hash" in exec_enforce_text + and "ai_agent_hash_exec_path" in exec_enforce_text + and "ai_agent_match_exact" not in exec_enforce_text + and "ai_agent_match_prefix" not in exec_enforce_text + and "ai_agent_match_suffix" not in exec_enforce_text, + "AI Agent exec enforcement BPF must use precomputed exact path hashes instead of verifier-expensive string scans", + ) require( "ai_agent_submit_event" in exec_enforce_text, "AI Agent exec enforcement must submit events through the AI Agent pipeline", diff --git a/agent/src/ebpf/user/load.c b/agent/src/ebpf/user/load.c index 57a73f632e6..98011f97a11 100644 --- a/agent/src/ebpf/user/load.c +++ b/agent/src/ebpf/user/load.c @@ -74,6 +74,23 @@ extern int btf__set_pointer_size(struct btf *btf, size_t ptr_sz); static int probe_read_kernel_feat; +static const char *prog_load_name(const struct ebpf_prog *prog, char *buf, + size_t buf_len) +{ + if (prog->type == BPF_PROG_TYPE_LSM && prog->sec_name != NULL && + !strncmp(prog->sec_name, "lsm/", 4) && prog->sec_name[4] != '\0') { + /* + * BCC uses the lsm__ prefix to set expected_attach_type to + * BPF_LSM_MAC. libbpf then adds the kernel BTF bpf_lsm_ + * prefix while resolving attach_btf_id. + */ + snprintf(buf, buf_len, "lsm__%s", prog->sec_name + 4); + return buf; + } + + return prog->name; +} + int suspend_stderr() { fflush(stderr); @@ -261,7 +278,10 @@ static void log_verifier_tail(const char *buf, size_t len) int load_ebpf_prog(struct ebpf_prog *prog) { - return bcc_prog_load(prog->type, prog->name, + char name_buf[128]; + const char *name = prog_load_name(prog, name_buf, sizeof(name_buf)); + + return bcc_prog_load(prog->type, name, prog->insns, prog->insns_size, prog->obj->license, prog->obj->kern_version, 0, NULL, 0 /*EBPF_LOG_LEVEL, log_buf, LOG_BUF_SZ */ ); @@ -776,12 +796,15 @@ static int load_obj__progs(struct ebpf_object *obj) // Modify eBPF instructions based on BTF relocation information. obj_relocate_core(new_prog); + char name_buf[128]; + const char *name = + prog_load_name(new_prog, name_buf, sizeof(name_buf)); int stderr_fd = suspend_stderr(); if (stderr_fd < 0) { ebpf_warning("Failed to suspend stderr\n"); } new_prog->prog_fd = - bcc_prog_load(new_prog->type, new_prog->name, + bcc_prog_load(new_prog->type, name, new_prog->insns, new_prog->insns_size, obj->license, obj->kern_version, 0, NULL, 0 /*EBPF_LOG_LEVEL, log_buf, LOG_BUF_SZ */ ); @@ -792,7 +815,7 @@ static int load_obj__progs(struct ebpf_object *obj) bool save_full_log = env_flag_enabled(VERIFIER_LOG_ENV); ebpf_warning ("bcc_prog_load() failed. name: %s, %s errno: %d\n", - new_prog->name, strerror(errno), errno); + name, strerror(errno), errno); char log_path[] = "/tmp/df_verifier_XXXXXX.log"; int tmp_fd = -1; char tail_buf[VERIFIER_LOG_TAIL_BYTES + 1] = { 0 }; @@ -858,7 +881,7 @@ static int load_obj__progs(struct ebpf_object *obj) log_pipe[1] = -1; fd2 = bcc_prog_load(new_prog->type, - new_prog->name, + name, new_prog->insns, new_prog->insns_size, obj->license, @@ -912,7 +935,7 @@ static int load_obj__progs(struct ebpf_object *obj) } fd2 = bcc_prog_load(new_prog->type, - new_prog->name, + name, new_prog->insns, new_prog->insns_size, obj->license, @@ -936,7 +959,7 @@ static int load_obj__progs(struct ebpf_object *obj) if (!load_attempted) { fd2 = bcc_prog_load(new_prog->type, - new_prog->name, + name, new_prog->insns, new_prog->insns_size, obj->license, @@ -972,7 +995,7 @@ static int load_obj__progs(struct ebpf_object *obj) // Preserve errno from the latest attempt for better diagnostics. ebpf_warning ("bcc_prog_load() still failed. name: %s, errno after retry: %d (orig %d)\n", - new_prog->name, retry_errno, saved_errno); + name, retry_errno, saved_errno); errno = retry_errno; } diff --git a/agent/src/ebpf/user/probe.c b/agent/src/ebpf/user/probe.c index 5f7e39f4e59..14787e1d773 100644 --- a/agent/src/ebpf/user/probe.c +++ b/agent/src/ebpf/user/probe.c @@ -511,7 +511,11 @@ struct ebpf_link *program__attach_lsm(void *prog) return NULL; } - pfd = bpf_raw_tracepoint_open(hook, ebpf_prog->prog_fd); + /* + * BPF LSM programs are loaded with attach_btf_id. The raw tracepoint + * attach syscall should use a NULL name and attach by that BTF id. + */ + pfd = bpf_raw_tracepoint_open(NULL, ebpf_prog->prog_fd); if (pfd < 0) { if (errno == EOPNOTSUPP || errno == EINVAL) { ebpf_warning("BPF LSM attach unsupported for %s: %s(%d)\n", diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index d89e3799888..db355b476a8 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -43,7 +43,9 @@ pub mod memory_profile; use std::ffi::{CStr, CString}; use std::ptr::{self, null_mut}; use std::slice; -use std::sync::atomic::{AtomicBool, AtomicI32, AtomicI64, AtomicU64, Ordering}; +#[cfg(feature = "enterprise")] +use std::sync::atomic::AtomicI32; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; use std::sync::Arc; use std::thread::{self, JoinHandle}; use std::time::Duration; @@ -684,7 +686,7 @@ static AI_AGENT_EXEC_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] static AI_AGENT_POLICY_EPOCH_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] -const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256; +const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 8; #[cfg(feature = "enterprise")] fn ai_agent_enforcement_mode_eq(value: &str, expected: &str) -> bool { From ceeaeca90e9c0382d15f0ed380c42ead6862273e Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 13 May 2026 19:15:40 +0800 Subject: [PATCH 13/24] feat(agent): support ai agent syscall override enforcement --- agent/crates/enterprise-utils/src/lib.rs | 66 ++++++ agent/src/common/kernel_capability.rs | 86 ++++++- agent/src/common/proc_event/linux.rs | 31 +++ agent/src/config/config.rs | 9 + agent/src/ebpf/kernel/include/bpf_base.h | 9 + .../test/test_ai_agent_source_contracts.py | 32 +++ agent/src/ebpf/user/load.c | 14 ++ agent/src/ebpf/user/tracer.c | 137 ++++++++++++ agent/src/ebpf/user/tracer.h | 10 + agent/src/ebpf_dispatcher.rs | 211 ++++++++++++++++-- server/agent_config/README-CH.md | 8 +- server/agent_config/README.md | 8 +- server/agent_config/template.yaml | 16 +- 13 files changed, 599 insertions(+), 38 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 806a8ad9b49..4acef7b21a2 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -520,6 +520,14 @@ pub mod ai_agent_enforcement { pub argv_contains_any: Vec, } + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct SyscallRuleInput { + pub id: String, + pub mode: EnforcementMode, + pub names: Vec, + pub symbols: Vec, + } + #[derive(Clone, Debug, PartialEq, Eq)] pub struct PolicyHit { pub rule_index: u32, @@ -532,6 +540,11 @@ pub mod ai_agent_enforcement { pub epoch: u64, } + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct CompiledSyscallPolicy { + pub epoch: u64, + } + impl CompiledExecPolicy { pub fn match_exec(&self, _exec_path: &str, _cmdline: &str) -> Option { None @@ -547,10 +560,63 @@ pub mod ai_agent_enforcement { } } + impl CompiledSyscallPolicy { + pub fn to_bpf_records(&self) -> Vec { + vec![] + } + + pub fn sync_to_bpf_maps( + &self, + _syscall_rules_fd: i32, + _policy_epoch_fd: i32, + _max_records: usize, + ) -> Result<(), String> { + Ok(()) + } + } + + #[repr(C)] + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct BpfSyscallRuleRecord { + pub rule_index: u32, + pub mode: u8, + pub syscall_key: u8, + pub reserved: u16, + pub syscall_id: u32, + pub errno_code: i32, + pub rule_id: [u8; 64], + pub syscall_name: [u8; 32], + } + + impl Default for BpfSyscallRuleRecord { + fn default() -> Self { + Self { + rule_index: 0, + mode: 0, + syscall_key: 0, + reserved: 0, + syscall_id: 0, + errno_code: 0, + rule_id: [0; 64], + syscall_name: [0; 32], + } + } + } + pub fn compile_exec_rules(_rules: &[ExecRuleInput]) -> Result { Ok(CompiledExecPolicy { epoch: 0 }) } + pub fn compile_syscall_rules( + _rules: &[SyscallRuleInput], + ) -> Result { + Ok(CompiledSyscallPolicy { epoch: 0 }) + } + + pub fn syscall_override_symbols(_syscall_key: u8) -> &'static [&'static str] { + &[] + } + pub fn set_global_exec_policy(_policy: Option) {} pub fn global_exec_policy() -> Option> { diff --git a/agent/src/common/kernel_capability.rs b/agent/src/common/kernel_capability.rs index cdab440b93c..1bec5e1118d 100644 --- a/agent/src/common/kernel_capability.rs +++ b/agent/src/common/kernel_capability.rs @@ -11,6 +11,8 @@ pub struct KernelCapability { pub bpf_lsm_configured: bool, pub bpf_lsm_active: bool, pub bpf_kprobe_override_configured: bool, + pub bpf_kprobe_override_available: bool, + pub bpf_kprobe_override_symbols: Vec, pub seccomp_filter_configured: bool, pub btf_vmlinux_available: bool, } @@ -29,14 +31,18 @@ impl KernelCapability { let lsm_text = fs::read_to_string(sys_root.join("kernel/security/lsm")).unwrap_or_default(); let config_text = read_kernel_config_from_roots(proc_root, boot_root).unwrap_or_default(); let bpf_lsm_active = lsm_has_bpf(&lsm_text); + let bpf_kprobe_override_symbols = read_kprobe_override_symbols(sys_root); + let bpf_kprobe_override_configured = + config_enabled(&config_text, "CONFIG_BPF_KPROBE_OVERRIDE") + || !bpf_kprobe_override_symbols.is_empty(); Self { bpf_lsm_configured: config_enabled(&config_text, "CONFIG_BPF_LSM") || bpf_lsm_active, bpf_lsm_active, - bpf_kprobe_override_configured: config_enabled( - &config_text, - "CONFIG_BPF_KPROBE_OVERRIDE", - ), + bpf_kprobe_override_configured, + bpf_kprobe_override_available: bpf_kprobe_override_configured + && !bpf_kprobe_override_symbols.is_empty(), + bpf_kprobe_override_symbols, seccomp_filter_configured: config_enabled(&config_text, "CONFIG_SECCOMP_FILTER"), btf_vmlinux_available: sys_root.join("kernel/btf/vmlinux").exists(), } @@ -45,6 +51,14 @@ impl KernelCapability { pub fn supports_exec_lsm_enforcement(&self) -> bool { self.bpf_lsm_configured && self.bpf_lsm_active } + + pub fn supports_kprobe_override_symbol(&self, symbol: &str) -> bool { + self.bpf_kprobe_override_available + && self + .bpf_kprobe_override_symbols + .iter() + .any(|allowed| allowed == symbol) + } } fn path_from_env(name: &str, default: &str) -> PathBuf { @@ -97,6 +111,37 @@ fn read_proc_kernel_config(proc_root: &Path) -> Option { decode_gzip(&compressed).ok() } +fn read_kprobe_override_symbols(sys_root: &Path) -> Vec { + const REL_PATHS: [&str; 2] = [ + "kernel/debug/error_injection/list", + "kernel/debug/fail_function/injectable", + ]; + + let mut symbols = Vec::new(); + for rel_path in REL_PATHS { + let Ok(text) = fs::read_to_string(sys_root.join(rel_path)) else { + continue; + }; + symbols.extend(parse_error_injection_symbols(&text)); + } + symbols.sort(); + symbols.dedup(); + symbols +} + +fn parse_error_injection_symbols(text: &str) -> Vec { + text.lines() + .filter_map(|line| { + let token = line.split_whitespace().next().unwrap_or_default().trim(); + if token.is_empty() || token.starts_with('#') { + None + } else { + Some(token.to_string()) + } + }) + .collect() +} + fn decode_gzip(bytes: &[u8]) -> Result { let mut decoder = GzDecoder::new(Cursor::new(bytes)); let mut output = String::new(); @@ -123,6 +168,14 @@ mod tests { )); } + #[test] + fn parse_error_injection_list_takes_first_column() { + assert_eq!( + parse_error_injection_symbols("__x64_sys_reboot\tEI_ETYPE_ERRNO\n# ignored\n"), + vec!["__x64_sys_reboot".to_string()] + ); + } + #[test] fn support_exec_lsm_requires_config_and_active_lsm() { assert!(KernelCapability { @@ -166,6 +219,31 @@ mod tests { let _ = fs::remove_dir_all(root); } + #[test] + fn detect_from_roots_uses_error_injection_allowlist_for_kprobe_override() { + let root = make_temp_root("kprobe-override-allowlist"); + let proc_root = root.join("host-proc"); + let sys_root = root.join("host-sys"); + let boot_root = root.join("boot"); + fs::create_dir_all(proc_root.join("sys/kernel")).unwrap(); + fs::create_dir_all(sys_root.join("kernel/debug/error_injection")).unwrap(); + fs::write(proc_root.join("sys/kernel/osrelease"), "4.18.0-test\n").unwrap(); + fs::write( + sys_root.join("kernel/debug/error_injection/list"), + "__x64_sys_reboot\n__x64_sys_init_module\n", + ) + .unwrap(); + + let capability = KernelCapability::detect_from_roots(&proc_root, &sys_root, &boot_root); + + assert!(capability.bpf_kprobe_override_configured); + assert!(capability.bpf_kprobe_override_available); + assert!(capability.supports_kprobe_override_symbol("__x64_sys_reboot")); + assert!(!capability.supports_kprobe_override_symbol("__x64_sys_mount")); + + let _ = fs::remove_dir_all(root); + } + fn make_temp_root(name: &str) -> std::path::PathBuf { let root = std::env::temp_dir().join(format!( "deepflow-kernel-capability-{name}-{}", diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index c0754f780d6..98d46aa7179 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -952,6 +952,37 @@ mod tests { assert_eq!(pb.mechanism, "lsm"); } + #[test] + fn test_proc_block_event_into_metric_carries_syscall_override() { + let raw = make_proc_block_raw( + 2, + 2, + 2, + 1, + 1, + 13, + 100, + 10, + 1000, + 1000, + 169, + 42, + b"block-direct-reboot", + b"reboot", + b"", + b"reboot", + ); + let event = ProcBlockEventData::try_from(raw.as_slice()).unwrap(); + let pb: metric::ProcBlockEventData = event.into(); + assert_eq!( + pb.target_type, + metric::EnforcementTargetType::EnforcementTargetSyscall as i32 + ); + assert_eq!(pb.mechanism, "kprobe_override"); + assert_eq!(pb.syscall_name, "reboot"); + assert_eq!(pb.syscall_id, 169); + } + #[test] fn test_new_proc_block_event_for_audit_encodes_proc_block_event() { let proc_event = ProcEvent { diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 64ca04bfd72..351426e9872 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -668,6 +668,7 @@ pub struct AiAgentEnforcementRule { pub action: AiAgentEnforcementAction, pub audit: bool, pub exec: AiAgentExecMatch, + pub syscall: AiAgentSyscallMatch, } impl Default for AiAgentEnforcementRule { @@ -680,6 +681,7 @@ impl Default for AiAgentEnforcementRule { action: AiAgentEnforcementAction::default(), audit: true, exec: AiAgentExecMatch::default(), + syscall: AiAgentSyscallMatch::default(), } } } @@ -710,6 +712,13 @@ pub struct AiAgentExecMatch { pub argv_contains_any: Vec, } +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentSyscallMatch { + pub names: Vec, + pub symbols: Vec, +} + #[derive(Clone, Debug, Deserialize, PartialEq, Eq)] #[serde(default)] pub struct Proc { diff --git a/agent/src/ebpf/kernel/include/bpf_base.h b/agent/src/ebpf/kernel/include/bpf_base.h index d2f3d05a617..75ebedbe769 100644 --- a/agent/src/ebpf/kernel/include/bpf_base.h +++ b/agent/src/ebpf/kernel/include/bpf_base.h @@ -41,6 +41,10 @@ struct task_struct; // Helper ID for bpf_get_current_task_btf (introduced in Linux 5.11). #define BPF_FUNC_get_current_task_btf 158 #endif +#ifndef BPF_FUNC_override_return +// Helper ID for bpf_override_return (kprobe override / error injection). +#define BPF_FUNC_override_return 58 +#endif /* * bpf helpers @@ -148,6 +152,11 @@ static int static int __attribute__ ((__unused__)) (*bpf_get_stack) (void *ctx, void *buf, __u32 size, int flags) = (void *)67; +static long + __attribute__ ((__unused__)) (*bpf_override_return) (struct pt_regs *regs, + __u64 rc) = + (void *)BPF_FUNC_override_return; +#define DF_BPF_OVERRIDE_RETURN_HELPER_DECLARED 1 // Linux 4.14: Added support for BPF_MAP_TYPE_CPUMAP, allowing packets to be redirected to specific CPUs. diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 72185add84c..ebaaa3d0244 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -163,6 +163,10 @@ def read_source(path: Path) -> str: "lsm_programs_handle" in tracer_c_text, "tracer.c must include LSM programs in the attach lifecycle", ) +require( + "optional_kprobe_programs_handle" in tracer_c_text, + "tracer.c must include optional AI Agent kprobe programs in the attach lifecycle", +) require( "new_prog->type == BPF_PROG_TYPE_LSM" in load_text and "Skip optional BPF LSM program" in load_text, @@ -225,5 +229,33 @@ def read_source(path: Path) -> str: "ai_agent_exec_enforce.bpf.c" in support_text, "support_extended_observability must include ai_agent_exec_enforce.bpf.c", ) + syscall_override_bpf = ENTERPRISE_BPF / "ai_agent_syscall_override.bpf.c" + require( + syscall_override_bpf.exists(), + f"missing enterprise AI Agent syscall override BPF: {syscall_override_bpf}", + ) + syscall_override_text = read_source(syscall_override_bpf) + require( + "bpf_override_return(ctx," in syscall_override_text, + "AI Agent syscall enforcement must use bpf_override_return for blocking", + ) + require( + 'SEC("kprobe/__x64_sys_reboot")' in syscall_override_text, + "AI Agent syscall enforcement must hook direct reboot syscall with kprobe override", + ) + require( + "df_K_ai_agent_syscall_override_" in tracer_c_text + or "optional kprobe: 'kprobe/__x64_sys_reboot'" in tracer_c_text, + "tracer.c must explicitly attach AI Agent syscall override kprobes", + ) + require( + "ai_agent_syscall_override.bpf.c" in support_text, + "support_extended_observability must include ai_agent_syscall_override.bpf.c", + ) + require( + "df_K_ai_agent_syscall_override_" in load_text + and "Skip optional AI Agent kprobe override program" in load_text, + "load.c must keep unsupported AI Agent kprobe override programs non-fatal", + ) print("[OK]") diff --git a/agent/src/ebpf/user/load.c b/agent/src/ebpf/user/load.c index 98011f97a11..fb8f56e4ace 100644 --- a/agent/src/ebpf/user/load.c +++ b/agent/src/ebpf/user/load.c @@ -665,6 +665,13 @@ static enum bpf_prog_type get_prog_type(struct sec_desc *desc) return prog_type; } +static bool is_optional_ai_agent_kprobe_override_prog(struct ebpf_prog *prog) +{ + return prog != NULL && prog->type == BPF_PROG_TYPE_KPROBE && + prog->name != NULL && + strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name; +} + static int load_obj__progs(struct ebpf_object *obj) { int i; @@ -1024,6 +1031,13 @@ static int load_obj__progs(struct ebpf_object *obj) continue; } + if (is_optional_ai_agent_kprobe_override_prog(new_prog)) { + ebpf_warning + ("Skip optional AI Agent kprobe override program '%s'; syscall enforcement disabled for this hook.\n", + new_prog->name); + continue; + } + if (memcmp(desc->name, "uprobe/", 7) && memcmp(desc->name, "uretprobe/", 10)) { return ETR_INVAL; diff --git a/agent/src/ebpf/user/tracer.c b/agent/src/ebpf/user/tracer.c index b61238594c8..b5185530dda 100644 --- a/agent/src/ebpf/user/tracer.c +++ b/agent/src/ebpf/user/tracer.c @@ -713,6 +713,29 @@ static struct lsm_prog *find_lsm_from_name(struct bpf_tracer *tracer, return NULL; } +static bool is_optional_ai_agent_kprobe_prog(struct ebpf_prog *prog) +{ + return prog != NULL && prog->type == BPF_PROG_TYPE_KPROBE && + prog->name != NULL && + strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name && + prog->sec_name != NULL && + !strncmp(prog->sec_name, "kprobe/", 7); +} + +static struct optional_kprobe_prog * +find_optional_kprobe_from_name(struct bpf_tracer *tracer, const char *name) +{ + struct optional_kprobe_prog *p; + int i; + for (i = 0; i < PROBES_NUM_MAX; i++) { + p = &tracer->optional_kprobes[i]; + if (!strcmp(p->name, name)) + return p; + } + + return NULL; +} + static struct tracepoint *get_tracepoint_from_tracer(struct bpf_tracer *tracer, const char *tp_name) { @@ -788,6 +811,32 @@ static struct lsm_prog *get_lsm_from_tracer(struct bpf_tracer *tracer, return p; } +static struct optional_kprobe_prog * +get_optional_kprobe_from_tracer(struct bpf_tracer *tracer, + struct ebpf_prog *prog) +{ + struct optional_kprobe_prog *p = + find_optional_kprobe_from_name(tracer, prog->name); + if (p && p->prog) + return p; + + if (tracer->optional_kprobes_count >= PROBES_NUM_MAX) { + ebpf_warning("optional kprobe programs count too many. The maximum is %d\n", + PROBES_NUM_MAX); + return NULL; + } + + int idx = tracer->optional_kprobes_count++; + p = &tracer->optional_kprobes[idx]; + p->prog_fd = prog->prog_fd; + p->prog = prog; + p->isret = !strncmp(prog->sec_name, "kretprobe/", 10); + + snprintf(p->name, sizeof(p->name), "%s", prog->name); + + return p; +} + void add_probe_to_tracer(struct probe *pb) { struct bpf_tracer *tracer = pb->tracer; @@ -1185,6 +1234,93 @@ static int lsm_detach(struct lsm_prog *p) return ETR_OK; } +static int optional_kprobe_attach(struct optional_kprobe_prog *p) +{ + if (p->link) { + return ETR_EXIST; + } + + if (p->prog->prog_fd < 0) { + ebpf_warning("skip unloaded optional kprobe program, name:%s.\n", + p->name); + return ETR_INVAL; + } + + if (p->prog->prog_fd == 0) { + p->prog->prog_fd = load_ebpf_prog(p->prog); + if (p->prog->prog_fd < 0) { + ebpf_warning("load optional kprobe program failed, name:%s.\n", + p->name); + return ETR_INVAL; + } + } + + p->link = exec_attach_kprobe(p->prog, p->prog->sec_name, p->isret, -1); + if (p->link == NULL) { + __sync_fetch_and_add(&attach_failed_count, 1); + return ETR_INVAL; + } + + return ETR_OK; +} + +static int optional_kprobe_detach(struct optional_kprobe_prog *p) +{ + if (p->link == NULL) + return ETR_NOTEXIST; + + if (p->link->detach) + p->link->detach(p->link); + + free(p->link); + p->link = NULL; + return ETR_OK; +} + +static int optional_kprobe_programs_handle(struct bpf_tracer *tracer, int type) +{ + int (*kprobe_handle) (struct optional_kprobe_prog * p) = NULL; + struct optional_kprobe_prog *kprobe; + struct ebpf_object *obj = tracer->obj; + int i, error; + + if (type == HOOK_ATTACH) + kprobe_handle = optional_kprobe_attach; + else if (type == HOOK_DETACH) + kprobe_handle = optional_kprobe_detach; + else + return ETR_INVAL; + + for (i = 0; i < obj->progs_cnt; i++) { + if (!is_optional_ai_agent_kprobe_prog(&obj->progs[i])) + continue; + + kprobe = get_optional_kprobe_from_tracer(tracer, &obj->progs[i]); + if (!kprobe) + continue; + + error = kprobe_handle(kprobe); + if (type == HOOK_ATTACH && error == ETR_EXIST) + continue; + if (type == HOOK_DETACH && error == ETR_NOTEXIST) + continue; + + if (error) { + ebpf_warning( + "%s optional kprobe: '%s', failed; syscall enforcement disabled for this hook.", + type == HOOK_ATTACH ? "attach" : "detach", + kprobe->prog->sec_name); + continue; + } + + ebpf_info("%s optional kprobe: '%s', succeed!", + type == HOOK_ATTACH ? "attach" : "detach", + kprobe->prog->sec_name); + } + + return ETR_OK; +} + static int lsm_programs_handle(struct bpf_tracer *tracer, int type) { int (*lsm_handle) (struct lsm_prog * p) = NULL; @@ -1347,6 +1483,7 @@ int tracer_hooks_process(struct bpf_tracer *tracer, enum tracer_hook_type type, } lsm_programs: + optional_kprobe_programs_handle(tracer, type); lsm_programs_handle(tracer, type); perf_event: diff --git a/agent/src/ebpf/user/tracer.h b/agent/src/ebpf/user/tracer.h index 098a0f42528..644eaf8dc2b 100644 --- a/agent/src/ebpf/user/tracer.h +++ b/agent/src/ebpf/user/tracer.h @@ -335,6 +335,14 @@ struct lsm_prog { int prog_fd; }; +struct optional_kprobe_prog { + char name[PROBE_NAME_SZ]; + struct ebpf_link *link; + struct ebpf_prog *prog; + int prog_fd; + bool isret; +}; + struct queue { int id; // Queue Identifier struct bpf_tracer *t; @@ -419,6 +427,8 @@ struct bpf_tracer { int kfuncs_count; struct lsm_prog lsms[PROBES_NUM_MAX]; int lsms_count; + struct optional_kprobe_prog optional_kprobes[PROBES_NUM_MAX]; + int optional_kprobes_count; pthread_mutex_t mutex_probes_lock; // Protect the probes operation in multiple threads /* diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index db355b476a8..7cb20634f47 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -59,6 +59,8 @@ use zstd::bulk::compress; use crate::common::ebpf::EbpfType; use crate::common::flow::L7Stats; +#[cfg(feature = "enterprise")] +use crate::common::kernel_capability::KernelCapability; use crate::common::l7_protocol_log::{ get_all_protocol, L7ProtocolBitmap, L7ProtocolParserInterface, }; @@ -684,9 +686,13 @@ static mut TIME_DIFF: Option> = None; #[cfg(feature = "enterprise")] static AI_AGENT_EXEC_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] +static AI_AGENT_SYSCALL_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); +#[cfg(feature = "enterprise")] static AI_AGENT_POLICY_EPOCH_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 8; +#[cfg(feature = "enterprise")] +const AI_AGENT_SYSCALL_RULES_BPF_MAX: usize = 32; #[cfg(feature = "enterprise")] fn ai_agent_enforcement_mode_eq(value: &str, expected: &str) -> bool { @@ -709,7 +715,22 @@ fn ai_agent_enforcement_lsm_allowed( } #[cfg(feature = "enterprise")] -fn ai_agent_enforcement_inputs( +fn ai_agent_enforcement_kprobe_override_allowed( + config: &crate::config::config::AiAgentEnforcementConfig, +) -> bool { + let mechanism_allowed = config + .allowed_mechanisms + .iter() + .any(|m| ai_agent_enforcement_mode_eq(m, "kprobe_override")); + let strategy_allows_override = matches!( + config.syscall_strategy.trim().to_ascii_lowercase().as_str(), + "auto" | "override_only" + ); + mechanism_allowed && strategy_allows_override +} + +#[cfg(feature = "enterprise")] +fn ai_agent_exec_enforcement_inputs( config: &crate::config::config::AiAgentEnforcementConfig, mode: enterprise_utils::ai_agent_enforcement::EnforcementMode, ) -> Vec { @@ -741,6 +762,51 @@ fn ai_agent_enforcement_inputs( .collect() } +#[cfg(feature = "enterprise")] +fn ai_agent_syscall_enforcement_inputs( + config: &crate::config::config::AiAgentEnforcementConfig, + mode: enterprise_utils::ai_agent_enforcement::EnforcementMode, +) -> Vec { + config + .rules + .iter() + .filter(|rule| { + ai_agent_enforcement_mode_eq(&rule.scope, "ai_agent_tree") + && ai_agent_enforcement_mode_eq(&rule.target_type, "syscall") + }) + .map(|rule| { + let rule_mode = if mode + == enterprise_utils::ai_agent_enforcement::EnforcementMode::Block + && ai_agent_enforcement_mode_eq(&rule.action.action_type, "deny") + { + enterprise_utils::ai_agent_enforcement::EnforcementMode::Block + } else { + enterprise_utils::ai_agent_enforcement::EnforcementMode::AuditOnly + }; + enterprise_utils::ai_agent_enforcement::SyscallRuleInput { + id: rule.id.clone(), + mode: rule_mode, + names: rule.syscall.names.clone(), + symbols: rule.syscall.symbols.clone(), + } + }) + .collect() +} + +#[cfg(feature = "enterprise")] +fn ai_agent_syscall_policy_supported_by_kernel( + policy: &enterprise_utils::ai_agent_enforcement::CompiledSyscallPolicy, + capability: &KernelCapability, +) -> bool { + let records = policy.to_bpf_records(); + !records.is_empty() + && records.iter().all(|record| { + enterprise_utils::ai_agent_enforcement::syscall_override_symbols(record.syscall_key) + .iter() + .any(|symbol| capability.supports_kprobe_override_symbol(symbol)) + }) +} + pub unsafe fn string_from_null_terminated_c_str(ptr: *const u8) -> String { CStr::from_ptr(ptr as *const libc::c_char) .to_string_lossy() @@ -1513,6 +1579,13 @@ impl EbpfCollector { ) }; AI_AGENT_EXEC_RULES_MAP_FD.store(exec_rules_fd, Ordering::Relaxed); + let syscall_rules_fd = unsafe { + ebpf::bpf_table_get_map_fd( + c"socket-trace".as_ptr(), + c"__ai_agent_syscall_rules".as_ptr(), + ) + }; + AI_AGENT_SYSCALL_RULES_MAP_FD.store(syscall_rules_fd, Ordering::Relaxed); let policy_epoch_fd = unsafe { ebpf::bpf_table_get_map_fd( c"socket-trace".as_ptr(), @@ -1520,10 +1593,10 @@ impl EbpfCollector { ) }; AI_AGENT_POLICY_EPOCH_MAP_FD.store(policy_epoch_fd, Ordering::Relaxed); - if exec_rules_fd < 0 || policy_epoch_fd < 0 { + if exec_rules_fd < 0 || syscall_rules_fd < 0 || policy_epoch_fd < 0 { warn!( - "AI Agent enforcement: BPF maps unavailable (__ai_agent_exec_rules={}, __ai_agent_policy_epoch={}), block mode will downgrade to audit-only", - exec_rules_fd, policy_epoch_fd + "AI Agent enforcement: BPF maps unavailable (__ai_agent_exec_rules={}, __ai_agent_syscall_rules={}, __ai_agent_policy_epoch={}), block mode will downgrade to audit-only for unavailable mechanisms", + exec_rules_fd, syscall_rules_fd, policy_epoch_fd ); } Self::sync_ai_agent_enforcement_policy(&config.ai_agent_enforcement); @@ -1568,7 +1641,7 @@ impl EbpfCollector { } #[cfg(feature = "enterprise")] - fn clear_ai_agent_enforcement_bpf_maps(max_records: usize) { + fn clear_ai_agent_exec_enforcement_bpf_maps(max_records: usize) { let exec_rules_fd = AI_AGENT_EXEC_RULES_MAP_FD.load(Ordering::Relaxed); let policy_epoch_fd = AI_AGENT_POLICY_EPOCH_MAP_FD.load(Ordering::Relaxed); if exec_rules_fd < 0 || policy_epoch_fd < 0 { @@ -1585,54 +1658,87 @@ impl EbpfCollector { } } + #[cfg(feature = "enterprise")] + fn clear_ai_agent_syscall_enforcement_bpf_maps(max_records: usize) { + let syscall_rules_fd = AI_AGENT_SYSCALL_RULES_MAP_FD.load(Ordering::Relaxed); + let policy_epoch_fd = AI_AGENT_POLICY_EPOCH_MAP_FD.load(Ordering::Relaxed); + if syscall_rules_fd < 0 || policy_epoch_fd < 0 { + return; + } + match enterprise_utils::ai_agent_enforcement::compile_syscall_rules(&[]) { + Ok(policy) => { + if let Err(e) = + policy.sync_to_bpf_maps(syscall_rules_fd, policy_epoch_fd, max_records) + { + warn!( + "AI Agent enforcement: failed to clear syscall BPF maps: {}", + e + ); + } + } + Err(e) => warn!( + "AI Agent enforcement: failed to build empty syscall policy: {}", + e + ), + } + } + #[cfg(feature = "enterprise")] fn sync_ai_agent_enforcement_policy(config: &crate::config::config::AiAgentEnforcementConfig) { use enterprise_utils::ai_agent_enforcement::{ - compile_exec_rules, set_global_exec_policy, EnforcementMode, + compile_exec_rules, compile_syscall_rules, set_global_exec_policy, EnforcementMode, }; - let max_records = config.max_rules.min(AI_AGENT_EXEC_RULES_BPF_MAX); + let max_exec_records = config.max_rules.min(AI_AGENT_EXEC_RULES_BPF_MAX); + let max_syscall_records = config.max_rules.min(AI_AGENT_SYSCALL_RULES_BPF_MAX); if !config.enabled { set_global_exec_policy(None); - Self::clear_ai_agent_enforcement_bpf_maps(max_records); + Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); + Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); return; } let exec_rules_fd = AI_AGENT_EXEC_RULES_MAP_FD.load(Ordering::Relaxed); + let syscall_rules_fd = AI_AGENT_SYSCALL_RULES_MAP_FD.load(Ordering::Relaxed); let policy_epoch_fd = AI_AGENT_POLICY_EPOCH_MAP_FD.load(Ordering::Relaxed); - let bpf_maps_available = exec_rules_fd >= 0 && policy_epoch_fd >= 0; + let exec_bpf_maps_available = exec_rules_fd >= 0 && policy_epoch_fd >= 0; + let syscall_bpf_maps_available = syscall_rules_fd >= 0 && policy_epoch_fd >= 0; let lsm_allowed = ai_agent_enforcement_lsm_allowed(config); + let kprobe_override_allowed = ai_agent_enforcement_kprobe_override_allowed(config); let requested_block = ai_agent_enforcement_mode_eq(&config.mode, "block"); - let effective_mode = if requested_block && bpf_maps_available && lsm_allowed { + let exec_effective_mode = if requested_block && exec_bpf_maps_available && lsm_allowed { EnforcementMode::Block } else { if requested_block { warn!( "AI Agent enforcement: block mode requested but BPF LSM is unavailable or disallowed; downgrade to audit-only (maps_available={}, lsm_allowed={})", - bpf_maps_available, lsm_allowed + exec_bpf_maps_available, lsm_allowed ); } EnforcementMode::AuditOnly }; - let inputs = ai_agent_enforcement_inputs(config, effective_mode); + let inputs = ai_agent_exec_enforcement_inputs(config, exec_effective_mode); let policy = match compile_exec_rules(&inputs) { Ok(policy) => policy, Err(e) => { warn!("AI Agent enforcement: failed to compile policy: {}", e); set_global_exec_policy(None); - Self::clear_ai_agent_enforcement_bpf_maps(max_records); + Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); return; } }; - if effective_mode == EnforcementMode::Block { - if let Err(e) = policy.sync_to_bpf_maps(exec_rules_fd, policy_epoch_fd, max_records) { + if exec_effective_mode == EnforcementMode::Block { + if let Err(e) = + policy.sync_to_bpf_maps(exec_rules_fd, policy_epoch_fd, max_exec_records) + { warn!( "AI Agent enforcement: failed to sync BPF policy, downgrade to audit-only: {}", e ); - let audit_inputs = ai_agent_enforcement_inputs(config, EnforcementMode::AuditOnly); + let audit_inputs = + ai_agent_exec_enforcement_inputs(config, EnforcementMode::AuditOnly); match compile_exec_rules(&audit_inputs) { Ok(audit_policy) => set_global_exec_policy(Some(audit_policy)), Err(e) => { @@ -1643,14 +1749,83 @@ impl EbpfCollector { set_global_exec_policy(None); } } - Self::clear_ai_agent_enforcement_bpf_maps(max_records); + Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); return; } } else { - Self::clear_ai_agent_enforcement_bpf_maps(max_records); + Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); } set_global_exec_policy(Some(policy)); + + if !syscall_bpf_maps_available || !kprobe_override_allowed { + if requested_block && !kprobe_override_allowed { + warn!( + "AI Agent enforcement: syscall block requested but kprobe_override is disallowed by config; syscall enforcement disabled" + ); + } + Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); + return; + } + + let syscall_inputs = + ai_agent_syscall_enforcement_inputs(config, EnforcementMode::AuditOnly); + if syscall_inputs.is_empty() { + Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); + return; + } + + let audit_syscall_policy = match compile_syscall_rules(&syscall_inputs) { + Ok(policy) => policy, + Err(e) => { + warn!( + "AI Agent enforcement: failed to compile syscall policy: {}", + e + ); + Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); + return; + } + }; + + let syscall_effective_mode = if requested_block { + let block_inputs = ai_agent_syscall_enforcement_inputs(config, EnforcementMode::Block); + match compile_syscall_rules(&block_inputs) { + Ok(block_policy) => { + let capability = KernelCapability::detect(); + if ai_agent_syscall_policy_supported_by_kernel(&block_policy, &capability) { + Some(block_policy) + } else { + warn!( + "AI Agent enforcement: syscall block requested but kprobe override allowlist does not cover all configured syscall rules; downgrade to audit-only (capability={:?})", + capability + ); + None + } + } + Err(e) => { + warn!( + "AI Agent enforcement: failed to compile blocking syscall policy: {}", + e + ); + None + } + } + } else { + None + }; + + let syscall_policy = syscall_effective_mode + .as_ref() + .unwrap_or(&audit_syscall_policy); + if let Err(e) = + syscall_policy.sync_to_bpf_maps(syscall_rules_fd, policy_epoch_fd, max_syscall_records) + { + warn!( + "AI Agent enforcement: failed to sync syscall BPF policy: {}", + e + ); + Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); + } } fn ebpf_start() { diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index 5c96e84c8da..9c0d71e825c 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2362,7 +2362,7 @@ inputs: #### 执行阻断 {#inputs.proc.ai_agent.enforcement} -AI Agent 命令执行阻断。第一版仅支持 exec 命令审计/阻断。 +AI Agent 命令和部分直接 syscall 执行阻断。 ##### 开启执行阻断 {#inputs.proc.ai_agent.enforcement.enabled} @@ -2456,7 +2456,7 @@ inputs: **详细描述**: -exec 命令阻断的机制选择。第一版在可用时使用 BPF LSM;kprobe override 预留给后续 syscall 阻断,并且仅在能力探测通过时使用。 +exec 命令阻断的机制选择。exec 阻断在可用时使用 BPF LSM。 ##### Syscall 阻断策略 {#inputs.proc.ai_agent.enforcement.syscall_strategy} @@ -2494,7 +2494,7 @@ inputs: **详细描述**: -预留给后续直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 +直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 ##### 允许的阻断机制 {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} @@ -2616,7 +2616,7 @@ inputs: **详细描述**: -AI Agent 命令执行阻断规则。第一版支持 exec 命令 exact/prefix/suffix 匹配。 +AI Agent 执行阻断规则。exec 规则支持 exact/prefix/suffix 匹配;syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 ### 符号表 {#inputs.proc.symbol_table} diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 6634b21c566..8909d111b2c 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2393,7 +2393,7 @@ Whether to enable AI Agent file IO event collection. #### Enforcement {#inputs.proc.ai_agent.enforcement} -AI Agent command execution enforcement. The first implementation only supports exec command audit/block. +AI Agent command and selected direct syscall execution enforcement. ##### Enabled {#inputs.proc.ai_agent.enforcement.enabled} @@ -2487,7 +2487,7 @@ inputs: **Description**: -Enforcement mechanism selection for exec command blocking. The first implementation uses BPF LSM when available; kprobe override is reserved for future syscall blocking when capability probing succeeds. +Enforcement mechanism selection for exec command blocking. Exec blocking uses BPF LSM when available. ##### Syscall Strategy {#inputs.proc.ai_agent.enforcement.syscall_strategy} @@ -2525,7 +2525,7 @@ inputs: **Description**: -Reserved mechanism selection for future direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. +Mechanism selection for direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. ##### Allowed Mechanisms {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} @@ -2647,7 +2647,7 @@ inputs: **Description**: -AI Agent command enforcement rules. The first implementation supports exec command exact/prefix/suffix matching. +AI Agent enforcement rules. Exec rules support exact/prefix/suffix matching; syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. ### Symbol Table {#inputs.proc.symbol_table} diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index 4159656ade8..f1c0b9ee29d 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1729,8 +1729,8 @@ inputs: # en: Enforcement # ch: 执行阻断 # description: - # en: AI Agent command execution enforcement. The first implementation only supports exec command audit/block. - # ch: AI Agent 命令执行阻断。第一版仅支持 exec 命令审计/阻断。 + # en: AI Agent command and selected direct syscall execution enforcement. + # ch: AI Agent 命令和部分直接 syscall 执行阻断。 enforcement: # type: bool # name: @@ -1763,9 +1763,9 @@ inputs: # ee_feature: true # description: # en: |- - # Enforcement mechanism selection for exec command blocking. The first implementation uses BPF LSM when available; kprobe override is reserved for future syscall blocking when capability probing succeeds. + # Enforcement mechanism selection for exec command blocking. Exec blocking uses BPF LSM when available. # ch: |- - # exec 命令阻断的机制选择。第一版在可用时使用 BPF LSM;kprobe override 预留给后续 syscall 阻断,并且仅在能力探测通过时使用。 + # exec 命令阻断的机制选择。exec 阻断在可用时使用 BPF LSM。 strategy: auto # type: string # name: @@ -1778,9 +1778,9 @@ inputs: # ee_feature: true # description: # en: |- - # Reserved mechanism selection for future direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. + # Mechanism selection for direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. # ch: |- - # 预留给后续直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 + # 直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 syscall_strategy: auto # type: string # name: @@ -1828,9 +1828,9 @@ inputs: # ee_feature: true # description: # en: |- - # AI Agent command enforcement rules. The first implementation supports exec command exact/prefix/suffix matching. + # AI Agent enforcement rules. Exec rules support exact/prefix/suffix matching; syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. # ch: |- - # AI Agent 命令执行阻断规则。第一版支持 exec 命令 exact/prefix/suffix 匹配。 + # AI Agent 执行阻断规则。exec 规则支持 exact/prefix/suffix 匹配;syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 rules: [] # type: section # name: From 247ae198424259519263693ecb742a39f83b6cd9 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 13 May 2026 19:50:06 +0800 Subject: [PATCH 14/24] fix(server): gofmt native tag table names --- server/libs/nativetag/nativetag.go | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/server/libs/nativetag/nativetag.go b/server/libs/nativetag/nativetag.go index 71fcf45be6c..d3ee4fae13c 100644 --- a/server/libs/nativetag/nativetag.go +++ b/server/libs/nativetag/nativetag.go @@ -51,34 +51,34 @@ const ( ) var NativeTagDatabaseNames = [MAX_NATIVE_TAG_TABLE]string{ - APPLICATION_LOG: "application_log", - EVENT_EVENT: "event", - EVENT_FILE_EVENT: "event", - EVENT_FILE_AGG_EVENT: "event", - EVENT_FILE_MGMT_EVENT: "event", - EVENT_PROC_PERM_EVENT: "event", - EVENT_PROC_OPS_EVENT: "event", - L7_FLOW_LOG: "flow_log", - DEEPFLOW_ADMIN: "deepflow_admin", - DEEPFLOW_TENANT: "deepflow_tenant", - EXT_METRICS: "ext_metrics", - PROFILE: "profile", + APPLICATION_LOG: "application_log", + EVENT_EVENT: "event", + EVENT_FILE_EVENT: "event", + EVENT_FILE_AGG_EVENT: "event", + EVENT_FILE_MGMT_EVENT: "event", + EVENT_PROC_PERM_EVENT: "event", + EVENT_PROC_OPS_EVENT: "event", + L7_FLOW_LOG: "flow_log", + DEEPFLOW_ADMIN: "deepflow_admin", + DEEPFLOW_TENANT: "deepflow_tenant", + EXT_METRICS: "ext_metrics", + PROFILE: "profile", EVENT_PROC_BLOCK_EVENT: "event", } var NativeTagTableNames = [MAX_NATIVE_TAG_TABLE]string{ - APPLICATION_LOG: "log", - EVENT_EVENT: "event", - EVENT_FILE_EVENT: "file_event", - EVENT_FILE_AGG_EVENT: "file_agg_event", - EVENT_FILE_MGMT_EVENT: "file_mgmt_event", - EVENT_PROC_PERM_EVENT: "proc_perm_event", - EVENT_PROC_OPS_EVENT: "proc_ops_event", - L7_FLOW_LOG: "l7_flow_log", - DEEPFLOW_ADMIN: "deepflow_server", - DEEPFLOW_TENANT: "deepflow_collector", - EXT_METRICS: "metrics", - PROFILE: "in_process", + APPLICATION_LOG: "log", + EVENT_EVENT: "event", + EVENT_FILE_EVENT: "file_event", + EVENT_FILE_AGG_EVENT: "file_agg_event", + EVENT_FILE_MGMT_EVENT: "file_mgmt_event", + EVENT_PROC_PERM_EVENT: "proc_perm_event", + EVENT_PROC_OPS_EVENT: "proc_ops_event", + L7_FLOW_LOG: "l7_flow_log", + DEEPFLOW_ADMIN: "deepflow_server", + DEEPFLOW_TENANT: "deepflow_collector", + EXT_METRICS: "metrics", + PROFILE: "in_process", EVENT_PROC_BLOCK_EVENT: "proc_block_event", } From c67f8909d5fc4351caf7dc9711ca3f7c97c5b449 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 14 May 2026 14:34:14 +0800 Subject: [PATCH 15/24] feat(agent): support ai agent argv enforcement --- agent/crates/enterprise-utils/src/lib.rs | 15 ++ agent/src/config/config.rs | 19 ++ .../test/test_ai_agent_source_contracts.py | 163 ++++++++++++++++-- agent/src/ebpf/user/extended/extended.c | 11 ++ agent/src/ebpf/user/extended/extended.h | 2 + agent/src/ebpf/user/load.c | 12 +- agent/src/ebpf/user/socket.c | 6 +- agent/src/ebpf/user/tracer.c | 3 +- agent/src/ebpf_dispatcher.rs | 32 +++- server/agent_config/README-CH.md | 8 +- server/agent_config/README.md | 8 +- server/agent_config/template.yaml | 4 +- 12 files changed, 258 insertions(+), 25 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 4acef7b21a2..e1b9b4bd318 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -517,9 +517,24 @@ pub mod ai_agent_enforcement { pub exact: Vec, pub prefix: Vec, pub suffix: Vec, + pub argv_matches: Vec, pub argv_contains_any: Vec, } + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum ExecArgvMatchOp { + Exact, + Prefix, + Suffix, + } + + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct ExecArgvMatchInput { + pub index: u8, + pub op: ExecArgvMatchOp, + pub value: String, + } + #[derive(Clone, Debug, PartialEq, Eq)] pub struct SyscallRuleInput { pub id: String, diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 351426e9872..edc56ae213e 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -709,9 +709,28 @@ pub struct AiAgentExecMatch { pub exact: Vec, pub prefix: Vec, pub suffix: Vec, + pub argv_matches: Vec, pub argv_contains_any: Vec, } +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentExecArgvMatch { + pub index: u8, + pub op: String, + pub value: String, +} + +impl Default for AiAgentExecArgvMatch { + fn default() -> Self { + Self { + index: 0, + op: "exact".to_string(), + value: String::new(), + } + } +} + #[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] #[serde(default)] pub struct AiAgentSyscallMatch { diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index ebaaa3d0244..1a63d81af10 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from pathlib import Path +import re import sys @@ -16,6 +17,7 @@ ENTERPRISE_AGENT = WORKSPACE_ROOT / "deepflow-core" / "agent" ENTERPRISE_BPF = ENTERPRISE_AGENT / "src" / "ebpf" / "user" / "extended" / "bpf" ENTERPRISE_SUPPORT = ENTERPRISE_AGENT / "scripts" / "support_extended_observability" +ENTERPRISE_FEATURE_TOP = ENTERPRISE_AGENT / "src" / "ebpf" / "user" / "extended" / "feature.top.mk" def require(condition: bool, message: str) -> None: @@ -53,6 +55,13 @@ def read_source(path: Path) -> str: in socket_c_text, "ai_agent limit log must use unsigned format specifiers", ) +require( + 'tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_execve");' + not in socket_c_text + and 'tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_execveat");' + not in socket_c_text, + "AI Agent exec argv enforcement must not attach sys_enter_execve tracepoints in the large socket-trace program", +) data_submit_start = socket_trace_text.find("__data_submit(") require(data_submit_start != -1, "missing __data_submit definition") @@ -180,12 +189,19 @@ def read_source(path: Path) -> str: if ENTERPRISE_AGENT.exists(): exec_enforce_bpf = ENTERPRISE_BPF / "ai_agent_exec_enforce.bpf.c" + exec_common_bpf = ENTERPRISE_BPF / "ai_agent_exec_common.bpf.h" require( exec_enforce_bpf.exists(), f"missing enterprise AI Agent exec enforcement BPF: {exec_enforce_bpf}", ) + require( + exec_common_bpf.exists(), + f"missing enterprise AI Agent exec common BPF header: {exec_common_bpf}", + ) exec_enforce_text = read_source(exec_enforce_bpf) + exec_common_text = read_source(exec_common_bpf) support_text = read_source(ENTERPRISE_SUPPORT) + feature_top_text = read_source(ENTERPRISE_FEATURE_TOP) require( 'SEC("lsm/bprm_check_security")' in exec_enforce_text, @@ -201,34 +217,157 @@ def read_source(path: Path) -> str: "AI Agent exec enforcement must scope matching to AI Agent processes", ) require( - "DATA_SOURCE_PROC_BLOCK_EVENT" in exec_enforce_text, + "DATA_SOURCE_PROC_BLOCK_EVENT" in exec_common_text, "AI Agent exec enforcement must emit proc block events", ) require( - "#define AI_AGENT_EXEC_MAX_RULES 8" in exec_enforce_text, + re.search(r"#define\s+AI_AGENT_EXEC_MAX_RULES\s+8", exec_common_text), "AI Agent exec enforcement must cap BPF-side rule scan to 8 records to stay under old verifier complexity limits", ) require( - "ai_agent_match_contains" not in exec_enforce_text - and "AI_AGENT_EXEC_MATCH_ARGV_CONTAINS" not in exec_enforce_text, - "AI Agent exec enforcement BPF must not include argv_contains nested scans on old verifier kernels", + "args ? args->cmdline : exec_path" not in exec_enforce_text, + "AI Agent exec LSM hook must not use a ternary map_value_or_null cmdline pointer on old verifiers", + ) + require( + "MAP_PERARRAY(ai_agent_exec_path_buf" in exec_enforce_text + and "struct ai_agent_exec_path *path_buf" in exec_enforce_text + and "path_buf->path" in exec_enforce_text + and "char exec_path[AI_AGENT_EXEC_PATTERN_LEN]" not in exec_enforce_text, + "AI Agent exec LSM hook must keep exec_path in a scratch map, not a large stack array with variable-index reads", + ) + require( + "TP_SYSCALL_PROG(enter_execve)" not in exec_enforce_text + and "TP_SYSCALL_PROG(enter_execveat)" not in exec_enforce_text + and "ai_agent_capture_exec_args" not in exec_enforce_text + and "argv_match_bits" not in exec_enforce_text, + "AI Agent exec LSM hook must not depend on sys_enter_execve argv capture", + ) + require( + "AI_AGENT_EXEC_MATCH_SUFFIX" in exec_common_text + and "suffix_hash" in exec_common_text + and "ai_agent_exec_collect_path_facts" in exec_common_text, + "AI Agent exec enforcement BPF must support suffix path matching with precomputed hashes", ) require( - "pattern_hash" in exec_enforce_text - and "ai_agent_hash_exec_path" in exec_enforce_text - and "ai_agent_match_exact" not in exec_enforce_text - and "ai_agent_match_prefix" not in exec_enforce_text - and "ai_agent_match_suffix" not in exec_enforce_text, - "AI Agent exec enforcement BPF must use precomputed exact path hashes instead of verifier-expensive string scans", + "ai_agent_exec_starts_with" not in exec_enforce_text + and "ai_agent_exec_ends_with" not in exec_enforce_text + and "exec_path[exec_idx]" not in exec_enforce_text, + "AI Agent exec LSM hook must not use dynamic-offset string comparisons on old verifiers", ) require( - "ai_agent_submit_event" in exec_enforce_text, + "ai_agent_exec_argv_hashes" not in exec_enforce_text + and "ai_agent_update_argv_match_bits" not in exec_enforce_text + and "ai_agent_cmdline_contains" not in exec_enforce_text, + "AI Agent exec LSM enforcement must stay path-only; argv matching belongs in small kprobe override programs", + ) + lsm_body = exec_enforce_text[ + exec_enforce_text.find('SEC("lsm/bprm_check_security")') : + ] + require( + "rule->argv_pattern_len != 0" in exec_enforce_text + and "argv_pattern" not in lsm_body + and "argv_pattern_hash" not in lsm_body, + "AI Agent exec LSM hook must ignore argv-qualified rules to avoid blocking path-only false positives", + ) + require( + "pattern_hash" in exec_common_text + and "ai_agent_hash_exec_path" in exec_common_text, + "AI Agent exec enforcement BPF must keep exact path hashing for low-cost exact matches", + ) + require( + "ai_agent_submit_event" in exec_common_text, "AI Agent exec enforcement must submit events through the AI Agent pipeline", ) + require( + "cmdline_src_sz" in exec_common_text + and "cmdline, cmdline_src_sz" in exec_common_text + and "path_buf->path,\n\t\t\t\t AI_AGENT_EXEC_PATTERN_LEN" in exec_enforce_text, + "AI Agent exec block event must copy cmdline using the actual source buffer size", + ) require( "ai_agent_exec_enforce.bpf.c" in support_text, "support_extended_observability must include ai_agent_exec_enforce.bpf.c", ) + exec_override_bpf = ENTERPRISE_BPF / "ai_agent_exec_override.bpf.c" + exec_override_standalone_bpf = ENTERPRISE_BPF / "ai_agent_exec_override_standalone.bpf.c" + require( + exec_override_bpf.exists(), + f"missing enterprise AI Agent exec override BPF: {exec_override_bpf}", + ) + require( + exec_override_standalone_bpf.exists(), + f"missing enterprise standalone AI Agent exec override BPF wrapper: {exec_override_standalone_bpf}", + ) + exec_override_text = read_source(exec_override_bpf) + exec_override_standalone_text = read_source(exec_override_standalone_bpf) + exec_bpf_text = "\n".join((exec_enforce_text, exec_common_text, exec_override_text)) + for forbidden in ( + "argv_contains_any", + "AI_AGENT_EXEC_MATCH_ARGV_CONTAINS", + "ARGV_CONTAINS", + "cmdline_regex", + "ai_agent_cmdline_contains", + "cmdline_contains", + ): + require( + forbidden not in exec_bpf_text, + f"AI Agent exec strong-block BPF must not contain legacy argv/cmdline selector '{forbidden}'", + ) + require( + 'SEC("kprobe/__x64_sys_execve")' in exec_override_text + and 'SEC("kprobe/__x64_sys_execveat")' in exec_override_text + and "bpf_override_return(ctx," in exec_override_text, + "AI Agent argv-qualified exec enforcement must use small kprobe override programs", + ) + require( + re.search(r"#define\s+AI_AGENT_EXEC_OVERRIDE_ARG_LEN\s+64", exec_override_text) + and "ai_agent_exec_override_read_argv_index" in exec_override_text + and "rule->argv_index" in exec_override_text + and "rule->argv_op != AI_AGENT_EXEC_ARGV_OP_EXACT" in exec_override_text + and "ai_agent_exec_override_arg_matches" in exec_override_text, + "AI Agent exec override must read only the configured argv index", + ) + require( + "ai_agent_exec_override_read_syscall_arg" in exec_override_text + and "const char *filename = (const char *)PT_REGS_PARM1(ctx);" not in exec_override_text + and "const char *const *argv = (const char *const *)PT_REGS_PARM2(ctx);" not in exec_override_text + and "const char *filename = (const char *)PT_REGS_PARM2(ctx);" not in exec_override_text + and "const char *const *argv = (const char *const *)PT_REGS_PARM3(ctx);" not in exec_override_text, + "AI Agent exec override must decode syscall-wrapper pt_regs before reading execve filename/argv", + ) + require( + "rule->argv_pattern_len == buf->arg_len" in exec_override_text + and "buf->arg.words[0] == rule->argv_pattern_words[0]" in exec_override_text + and "buf->arg.words[7] == rule->argv_pattern_words[7]" in exec_override_text + and "rule->argv_pattern," not in exec_override_text, + "AI Agent exec override must compare argv by fixed len+word chunks, not by scanning policy argv_pattern from map values", + ) + require( + "df_K_ai_agent_exec_override_" in tracer_c_text + and "df_K_ai_agent_exec_override_" in load_text, + "tracer/load must treat AI Agent exec override kprobes as optional kprobe override programs", + ) + require( + "ai_agent_exec_override.bpf.c" not in support_text + or 'socket_trace_bpf_path" "#include "../user/extended/bpf/ai_agent_exec_override.bpf.c"' not in support_text, + "support_extended_observability must not include argv exec override into socket_trace.bpf.c", + ) + require( + "AI_AGENT_EXEC_OVERRIDE_ELFS" in feature_top_text + and "ai_agent_exec_override_standalone.bpf.c" in feature_top_text, + "enterprise Makefile extension must build argv exec override as a standalone BPF object", + ) + require( + "MAP_PERF_EVENT(socket_data" in exec_override_standalone_text + and "MAP_HASH(ai_agent_pids" in exec_override_standalone_text + and "ai_agent_submit_event" in exec_override_standalone_text + and '#include "ai_agent_exec_override.bpf.c"' in exec_override_standalone_text, + "standalone exec override wrapper must define shared map symbols and include only exec override kprobes", + ) + require( + "buf->arg.bytes,\n\t\t\t\t AI_AGENT_EXEC_OVERRIDE_ARG_LEN" in exec_override_text, + "AI Agent exec override must emit argv cmdline with the 64-byte argv buffer size, not the 256-byte path size", + ) syscall_override_bpf = ENTERPRISE_BPF / "ai_agent_syscall_override.bpf.c" require( syscall_override_bpf.exists(), diff --git a/agent/src/ebpf/user/extended/extended.c b/agent/src/ebpf/user/extended/extended.c index 870fd1dcfff..483bdfbdd60 100755 --- a/agent/src/ebpf/user/extended/extended.c +++ b/agent/src/ebpf/user/extended/extended.c @@ -41,6 +41,17 @@ void __attribute__ ((weak)) extended_prog_jump_tables(struct bpf_tracer *tracer) { } +int __attribute__ ((weak)) extended_socket_tracer_ready(struct bpf_tracer *tracer) +{ + return 0; +} + +int __attribute__ ((weak)) extended_map_reuse_fd(const char *obj_name, + const char *map_name) +{ + return -1; +} + int __attribute__ ((weak)) collect_extended_uprobe_syms_from_procfs(struct tracer_probes_conf *conf) diff --git a/agent/src/ebpf/user/extended/extended.h b/agent/src/ebpf/user/extended/extended.h index 4464caea5ec..c8a0b092adf 100644 --- a/agent/src/ebpf/user/extended/extended.h +++ b/agent/src/ebpf/user/extended/extended.h @@ -37,6 +37,8 @@ int extended_reader_create(struct bpf_tracer *tracer); int extended_maps_set(struct bpf_tracer *tracer); void extended_prog_jump_tables(struct bpf_tracer *tracer); +int extended_socket_tracer_ready(struct bpf_tracer *tracer); +int extended_map_reuse_fd(const char *obj_name, const char *map_name); /** * @brief **collect_extended_uprobe_syms_from_procfs()** extend the handling of uprobe diff --git a/agent/src/ebpf/user/load.c b/agent/src/ebpf/user/load.c index fb8f56e4ace..4d97d89f1f4 100644 --- a/agent/src/ebpf/user/load.c +++ b/agent/src/ebpf/user/load.c @@ -46,6 +46,7 @@ #include "ssl_tracer.h" #include "profile/perf_profiler.h" #include "unwind_tracer.h" +#include "extended/extended.h" #ifndef BPF_PROG_TYPE_LSM #define BPF_PROG_TYPE_LSM 29 @@ -669,7 +670,8 @@ static bool is_optional_ai_agent_kprobe_override_prog(struct ebpf_prog *prog) { return prog != NULL && prog->type == BPF_PROG_TYPE_KPROBE && prog->name != NULL && - strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name; + (strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name || + strstr(prog->name, "df_K_ai_agent_exec_override_") == prog->name); } static int load_obj__progs(struct ebpf_object *obj) @@ -1406,6 +1408,14 @@ int ebpf_obj_load(struct ebpf_object *obj) extended_map_preprocess(map); + int reused_fd = extended_map_reuse_fd(obj->name, map->name); + if (reused_fd >= 0) { + map->fd = reused_fd; + ebpf_info("reuse map fd:%d for obj:%s map:%s\n", + map->fd, obj->name, map->name); + continue; + } + map->fd = bcc_create_map(map->def.type, map->name, map->def.key_size, map->def.value_size, map->def.max_entries, diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index b7010129b96..eeff69ab59c 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -312,8 +312,7 @@ static inline void config_probes_for_ai_agent(struct tracer_probes_conf *tps) tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setgid"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setreuid"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setregid"); - /* fork propagation is AI-agent-specific; exec/exit are already covered by - * config_probes_for_proc_event(). */ + /* fork propagation is AI-agent-specific. */ tps_set_symbol(tps, "tracepoint/sched/sched_process_fork"); } #else @@ -3233,6 +3232,9 @@ int running_socket_tracer(tracer_callback_t handle, tracer->data_limit_max = socket_data_limit_max; + if (extended_socket_tracer_ready(tracer)) + return -EINVAL; + // Insert prog of output data into map for using BPF Tail Calls. insert_output_prog_to_map(tracer); diff --git a/agent/src/ebpf/user/tracer.c b/agent/src/ebpf/user/tracer.c index b5185530dda..9adbfcefc15 100644 --- a/agent/src/ebpf/user/tracer.c +++ b/agent/src/ebpf/user/tracer.c @@ -717,7 +717,8 @@ static bool is_optional_ai_agent_kprobe_prog(struct ebpf_prog *prog) { return prog != NULL && prog->type == BPF_PROG_TYPE_KPROBE && prog->name != NULL && - strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name && + (strstr(prog->name, "df_K_ai_agent_syscall_override_") == prog->name || + strstr(prog->name, "df_K_ai_agent_exec_override_") == prog->name) && prog->sec_name != NULL && !strncmp(prog->sec_name, "kprobe/", 7); } diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 7cb20634f47..55a1898e2ab 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -729,6 +729,17 @@ fn ai_agent_enforcement_kprobe_override_allowed( mechanism_allowed && strategy_allows_override } +#[cfg(feature = "enterprise")] +fn ai_agent_exec_argv_match_op( + op: &str, +) -> enterprise_utils::ai_agent_enforcement::ExecArgvMatchOp { + match op.trim().to_ascii_lowercase().as_str() { + "prefix" => enterprise_utils::ai_agent_enforcement::ExecArgvMatchOp::Prefix, + "suffix" => enterprise_utils::ai_agent_enforcement::ExecArgvMatchOp::Suffix, + _ => enterprise_utils::ai_agent_enforcement::ExecArgvMatchOp::Exact, + } +} + #[cfg(feature = "enterprise")] fn ai_agent_exec_enforcement_inputs( config: &crate::config::config::AiAgentEnforcementConfig, @@ -756,6 +767,18 @@ fn ai_agent_exec_enforcement_inputs( exact: rule.exec.exact.clone(), prefix: rule.exec.prefix.clone(), suffix: rule.exec.suffix.clone(), + argv_matches: rule + .exec + .argv_matches + .iter() + .map( + |m| enterprise_utils::ai_agent_enforcement::ExecArgvMatchInput { + index: m.index, + op: ai_agent_exec_argv_match_op(&m.op), + value: m.value.clone(), + }, + ) + .collect(), argv_contains_any: rule.exec.argv_contains_any.clone(), } }) @@ -1706,13 +1729,16 @@ impl EbpfCollector { let lsm_allowed = ai_agent_enforcement_lsm_allowed(config); let kprobe_override_allowed = ai_agent_enforcement_kprobe_override_allowed(config); let requested_block = ai_agent_enforcement_mode_eq(&config.mode, "block"); - let exec_effective_mode = if requested_block && exec_bpf_maps_available && lsm_allowed { + let exec_effective_mode = if requested_block + && exec_bpf_maps_available + && (lsm_allowed || kprobe_override_allowed) + { EnforcementMode::Block } else { if requested_block { warn!( - "AI Agent enforcement: block mode requested but BPF LSM is unavailable or disallowed; downgrade to audit-only (maps_available={}, lsm_allowed={})", - exec_bpf_maps_available, lsm_allowed + "AI Agent enforcement: block mode requested but no exec blocking mechanism is available or allowed; downgrade to audit-only (maps_available={}, lsm_allowed={}, kprobe_override_allowed={})", + exec_bpf_maps_available, lsm_allowed, kprobe_override_allowed ); } EnforcementMode::AuditOnly diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index 9c0d71e825c..648ae14c5ea 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2513,7 +2513,11 @@ inputs: proc: ai_agent: enforcement: - allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] + allowed_mechanisms: + - lsm + - kprobe_override + - sigkill + - seccomp ``` **枚举可选值**: @@ -2616,7 +2620,7 @@ inputs: **详细描述**: -AI Agent 执行阻断规则。exec 规则支持 exact/prefix/suffix 匹配;syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 +AI Agent 执行阻断规则。强阻断 exec 规则支持 path exact/suffix 选择器,并可通过 argv_matches 指定固定 argv index 和 exact value。argv_contains_any 仅作为审计兼容字段,不支持强阻断。syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 ### 符号表 {#inputs.proc.symbol_table} diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 8909d111b2c..45f2a48783a 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2544,7 +2544,11 @@ inputs: proc: ai_agent: enforcement: - allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] + allowed_mechanisms: + - lsm + - kprobe_override + - sigkill + - seccomp ``` **Enum options**: @@ -2647,7 +2651,7 @@ inputs: **Description**: -AI Agent enforcement rules. Exec rules support exact/prefix/suffix matching; syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. +AI Agent enforcement rules. Strong-block exec rules support path exact/suffix selectors and optional argv_matches selectors with fixed argv index and exact value. argv_contains_any is audit-only compatibility and is not accepted for strong block. Syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. ### Symbol Table {#inputs.proc.symbol_table} diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index f1c0b9ee29d..cc501fbbbd8 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1828,9 +1828,9 @@ inputs: # ee_feature: true # description: # en: |- - # AI Agent enforcement rules. Exec rules support exact/prefix/suffix matching; syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. + # AI Agent enforcement rules. Strong-block exec rules support path exact/suffix selectors and optional argv_matches selectors with fixed argv index and exact value. argv_contains_any is audit-only compatibility and is not accepted for strong block. Syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. # ch: |- - # AI Agent 执行阻断规则。exec 规则支持 exact/prefix/suffix 匹配;syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 + # AI Agent 执行阻断规则。强阻断 exec 规则支持 path exact/suffix 选择器,并可通过 argv_matches 指定固定 argv index 和 exact value。argv_contains_any 仅作为审计兼容字段,不支持强阻断。syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 rules: [] # type: section # name: From e12e06a77e4dffc6f038e9197503f0a793ee1f34 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 18 May 2026 13:45:13 +0800 Subject: [PATCH 16/24] fix(server): add proc block event upgrade issue --- .../db/metadb/migrator/schema/const.go | 2 +- .../schema/rawsql/mysql/issu/7.1.0.40.sql | 1 - .../schema/rawsql/mysql/issu/7.1.0.41.sql | 38 +++++++++++++++++++ .../migrator/schema/schema_regression_test.go | 25 ++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.41.sql diff --git a/server/controller/db/metadb/migrator/schema/const.go b/server/controller/db/metadb/migrator/schema/const.go index 26a1c49e4db..f2ac3fa6178 100644 --- a/server/controller/db/metadb/migrator/schema/const.go +++ b/server/controller/db/metadb/migrator/schema/const.go @@ -20,5 +20,5 @@ const ( RAW_SQL_ROOT_DIR = "/etc/metadb/schema/rawsql" DB_VERSION_TABLE = "db_version" - DB_VERSION_EXPECTED = "7.1.0.40" + DB_VERSION_EXPECTED = "7.1.0.41" ) diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql index b4a677710a8..f29bff0fe73 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.40.sql @@ -64,7 +64,6 @@ CALL InsertDataSourceIfNotExists('事件-文件读写聚合事件', 'event.file_ CALL InsertDataSourceIfNotExists('事件-文件管理事件', 'event.file_mgmt_event', 0, 7*24); CALL InsertDataSourceIfNotExists('事件-进程权限事件', 'event.proc_perm_event', 0, 7*24); CALL InsertDataSourceIfNotExists('事件-进程操作事件', 'event.proc_ops_event', 0, 7*24); -CALL InsertDataSourceIfNotExists('事件-进程阻断事件', 'event.proc_block_event', 0, 7*24); DROP PROCEDURE AddColumnIfNotExists; DROP PROCEDURE InsertDataSourceIfNotExists; diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.41.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.41.sql new file mode 100644 index 00000000000..535708f6294 --- /dev/null +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.41.sql @@ -0,0 +1,38 @@ +DROP PROCEDURE IF EXISTS InsertDataSourceIfNotExists; + +CREATE PROCEDURE InsertDataSourceIfNotExists( + IN p_display_name VARCHAR(64), + IN p_data_table_collection VARCHAR(64), + IN p_interval_time INTEGER, + IN p_retention_time INTEGER +) +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM data_source + WHERE data_table_collection = p_data_table_collection + ) THEN + INSERT INTO data_source ( + display_name, + data_table_collection, + base_data_source_id, + interval_time, + retention_time, + lcuuid + ) + VALUES ( + p_display_name, + p_data_table_collection, + 0, + p_interval_time, + p_retention_time, + UUID() + ); + END IF; +END; + +CALL InsertDataSourceIfNotExists('事件-进程阻断事件', 'event.proc_block_event', 0, 7*24); + +DROP PROCEDURE InsertDataSourceIfNotExists; + +UPDATE db_version SET version='7.1.0.41'; diff --git a/server/controller/db/metadb/migrator/schema/schema_regression_test.go b/server/controller/db/metadb/migrator/schema/schema_regression_test.go index 10f30f932aa..1d57161ad4a 100644 --- a/server/controller/db/metadb/migrator/schema/schema_regression_test.go +++ b/server/controller/db/metadb/migrator/schema/schema_regression_test.go @@ -56,6 +56,31 @@ func TestMySQLDMLInsert_AIAgentEventDataSourcesShareDefaultRetention(t *testing. } } +func TestProcBlockEventDataSourceHasPostMainMySQLIssue(t *testing.T) { + const mainBaselineVersion = "7.1.0.40" + + if !versionGreater(DB_VERSION_EXPECTED, mainBaselineVersion) { + t.Fatalf("DB_VERSION_EXPECTED=%s, want version greater than main baseline %s", DB_VERSION_EXPECTED, mainBaselineVersion) + } + + sqlPath := filepath.Join("rawsql", "mysql", "issu", DB_VERSION_EXPECTED+".sql") + content, err := os.ReadFile(sqlPath) + if err != nil { + t.Fatalf("read latest mysql issu sql failed: %v", err) + } + sql := string(content) + + required := []string{ + "CALL InsertDataSourceIfNotExists('事件-进程阻断事件', 'event.proc_block_event', 0, 7*24);", + "UPDATE db_version SET version='" + DB_VERSION_EXPECTED + "';", + } + for _, item := range required { + if !strings.Contains(sql, item) { + t.Fatalf("missing proc block event mysql issu entry: %s", item) + } + } +} + func versionGreater(left, right string) bool { leftParts := strings.Split(left, ".") rightParts := strings.Split(right, ".") From d1465afd9f14567285fff7e1f99ff4ee7adef782 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 18 May 2026 19:39:56 +0800 Subject: [PATCH 17/24] feat(agent): raise ai agent exec rule cap to 256 --- .../ebpf/test/test_ai_agent_reboot_syscall.py | 69 +++++++++++++++++++ .../test/test_ai_agent_source_contracts.py | 4 +- agent/src/ebpf_dispatcher.rs | 2 +- 3 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 agent/src/ebpf/test/test_ai_agent_reboot_syscall.py diff --git a/agent/src/ebpf/test/test_ai_agent_reboot_syscall.py b/agent/src/ebpf/test/test_ai_agent_reboot_syscall.py new file mode 100644 index 00000000000..50ef5195e1f --- /dev/null +++ b/agent/src/ebpf/test/test_ai_agent_reboot_syscall.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import argparse +import ctypes +import json +import os +import socket +import sys +import time +from typing import Tuple + + +DEFAULT_HOST = "10.50.120.81" +DEFAULT_PORT = 18081 +DEFAULT_PATH = "/v1/chat/completions" +DEFAULT_SYSCALL_ID = 169 # x86_64 SYS_reboot + + +def send_ai_request(host: str, port: int, path: str, timeout: float) -> Tuple[str, bytes]: + body = json.dumps({"model": "x", "messages": []}, separators=(",", ":")).encode() + request = ( + f"POST {path} HTTP/1.1\r\n" + f"Host: {host}:{port}\r\n" + "Content-Type: application/json\r\n" + f"Content-Length: {len(body)}\r\n" + "Connection: close\r\n" + "\r\n" + ).encode() + body + + with socket.create_connection((host, port), timeout=timeout) as conn: + conn.sendall(request) + response = conn.recv(4096) + + status_line = response.splitlines()[0].decode(errors="replace") if response else "" + return status_line, response + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Trigger AI-agent scope, then verify direct reboot syscall is blocked with EPERM." + ) + parser.add_argument("--host", default=DEFAULT_HOST) + parser.add_argument("--port", type=int, default=DEFAULT_PORT) + parser.add_argument("--path", default=DEFAULT_PATH) + parser.add_argument("--timeout", type=float, default=3.0) + parser.add_argument("--tries", type=int, default=30) + parser.add_argument("--interval", type=float, default=1.0) + parser.add_argument("--syscall-id", type=int, default=DEFAULT_SYSCALL_ID) + args = parser.parse_args() + + status_line, _ = send_ai_request(args.host, args.port, args.path, args.timeout) + print(status_line, flush=True) + print(f"AI_HTTP_SENT pid={os.getpid()}", flush=True) + + libc = ctypes.CDLL(None, use_errno=True) + for attempt in range(1, args.tries + 1): + time.sleep(args.interval) + ctypes.set_errno(0) + rc = libc.syscall(args.syscall_id, 0, 0, 0, 0) + err = ctypes.get_errno() + print(f"TRY {attempt}: rc={rc} errno={err}", flush=True) + if err == 1: + return 0 + + print("REBOOT_SYSCALL_NOT_BLOCKED", flush=True) + return 2 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 1a63d81af10..dc326cd6d1e 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -221,8 +221,8 @@ def read_source(path: Path) -> str: "AI Agent exec enforcement must emit proc block events", ) require( - re.search(r"#define\s+AI_AGENT_EXEC_MAX_RULES\s+8", exec_common_text), - "AI Agent exec enforcement must cap BPF-side rule scan to 8 records to stay under old verifier complexity limits", + re.search(r"#define\s+AI_AGENT_EXEC_MAX_RULES\s+256", exec_common_text), + "AI Agent exec enforcement must expose a 256-record BPF-side exec rule cap", ) require( "args ? args->cmdline : exec_path" not in exec_enforce_text, diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 55a1898e2ab..d7a85a48456 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -690,7 +690,7 @@ static AI_AGENT_SYSCALL_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] static AI_AGENT_POLICY_EPOCH_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] -const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 8; +const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256; #[cfg(feature = "enterprise")] const AI_AGENT_SYSCALL_RULES_BPF_MAX: usize = 32; From ac15b71c4697f35b5fe7c7f48f98c65b5f09d863 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 19 May 2026 19:04:25 +0800 Subject: [PATCH 18/24] docs(agent): clarify ai agent enforcement config --- agent/src/config/config.rs | 7 +- server/agent_config/README-CH.md | 240 ++++++++++++++++++++++++++---- server/agent_config/README.md | 236 ++++++++++++++++++++++++++--- server/agent_config/template.yaml | 98 +++++++++--- 4 files changed, 509 insertions(+), 72 deletions(-) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index edc56ae213e..743506789f8 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -645,12 +645,7 @@ impl Default for AiAgentEnforcementConfig { mode: "audit_only".to_string(), strategy: "auto".to_string(), syscall_strategy: "auto".to_string(), - allowed_mechanisms: vec![ - "lsm".to_string(), - "kprobe_override".to_string(), - "sigkill".to_string(), - "seccomp".to_string(), - ], + allowed_mechanisms: vec!["lsm".to_string(), "kprobe_override".to_string()], default_fallback: "sigkill".to_string(), max_rules: 256, rules: Vec::new(), diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index 648ae14c5ea..0a6b3adf040 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2360,11 +2360,11 @@ inputs: 是否开启 AI Agent 文件 IO 事件采集。 -#### 执行阻断 {#inputs.proc.ai_agent.enforcement} +#### 执行治理 {#inputs.proc.ai_agent.enforcement} -AI Agent 命令和部分直接 syscall 执行阻断。 +AI Agent 命令和部分直接 syscall 的审计/阻断。 -##### 开启执行阻断 {#inputs.proc.ai_agent.enforcement.enabled} +##### 开启执行治理 {#inputs.proc.ai_agent.enforcement.enabled} **标签**: @@ -2389,6 +2389,13 @@ inputs: | ---- | ---------------------------- | | Type | bool | +**详细描述**: + +开启 AI Agent enforcement 链路。 + +- `mode: audit_only` 时,只输出审计事件,不真正阻止执行。 +- `mode: block` 时,命中的 `deny` 规则可能真正阻止执行。 + ##### 模式 {#inputs.proc.ai_agent.enforcement.mode} **标签**: @@ -2420,6 +2427,20 @@ inputs: | ---- | ---------------------------- | | Type | string | +**详细描述**: + +全局 enforcement 模式。 + +- `audit_only`:所有命中规则都只输出 `proc_block_event`,`guarantee=audit_only`,不会真正阻止命令或 syscall。 +- `block`:只有 `action.type: deny` 的规则会尝试进入强阻断链路,其他规则仍然是 audit-only。 + +如果希望在 `mode: block` 下让某条规则只审计,建议显式配置: + +```yaml +action: + type: audit +``` + ##### Exec 阻断策略 {#inputs.proc.ai_agent.enforcement.strategy} **标签**: @@ -2443,11 +2464,8 @@ inputs: **枚举可选值**: | Value | Note | | ----- | ---------------------------- | -| auto | | -| lsm_only | | -| override_only | | -| sigkill_only | | -| audit_only | | +| auto | 推荐值 | +| lsm_only | 仅启用 path-only LSM exec 阻断 | **模式**: | Key | Value | @@ -2456,7 +2474,17 @@ inputs: **详细描述**: -exec 命令阻断的机制选择。exec 阻断在可用时使用 BPF LSM。 +path-only exec 阻断的机制选择。 + +当前强阻断行为: + +- `auto`:在允许且可用时,path-only exec 规则走 BPF LSM。 +- `lsm_only`:path-only exec 规则必须走 LSM。 + +说明: + +- 带 `argv` 条件的 exec 阻断依赖 `allowed_mechanisms` 包含 `kprobe_override` 且 `syscall_strategy` 允许 override。 +- `override_only`、`sigkill_only`、`audit_only` 等历史值当前仅为兼容保留,不建议新配置使用。 ##### Syscall 阻断策略 {#inputs.proc.ai_agent.enforcement.syscall_strategy} @@ -2481,11 +2509,8 @@ inputs: **枚举可选值**: | Value | Note | | ----- | ---------------------------- | -| auto | | -| lsm_only | | -| override_only | | -| sigkill_only | | -| audit_only | | +| auto | 推荐值 | +| override_only | 通过 kprobe override 处理 direct syscall 阻断 | **模式**: | Key | Value | @@ -2494,7 +2519,14 @@ inputs: **详细描述**: -直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 +直接 syscall 阻断的机制选择。 + +当前强阻断行为只有两种有效写法: + +- `auto` +- `override_only` + +两者当前都会走 kprobe override。`lsm_only`、`sigkill_only`、`audit_only` 等历史值仅为兼容保留,不建议新配置使用。 ##### 允许的阻断机制 {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} @@ -2516,17 +2548,13 @@ inputs: allowed_mechanisms: - lsm - kprobe_override - - sigkill - - seccomp ``` **枚举可选值**: | Value | Note | | ----- | ---------------------------- | -| lsm | | -| kprobe_override | | -| sigkill | | -| seccomp | | +| lsm | path-only exec 强阻断 | +| kprobe_override | argv-qualified exec 和 direct syscall 强阻断 | **模式**: | Key | Value | @@ -2535,7 +2563,17 @@ inputs: **详细描述**: -配置允许使用的阻断机制。只有列表包含 kprobe_override 且运行时能力探测确认支持时,才会尝试使用 bpf_override_return。 +配置允许使用的阻断机制。 + +当前实现真正消费的只有: + +- `lsm` +- `kprobe_override` + +说明: + +- 只有列表包含 `kprobe_override` 且运行时能力探测确认支持时,才会尝试使用 `bpf_override_return`。 +- `sigkill`、`seccomp` 当前只是兼容保留值,不参与现有内核阻断链路。 ##### 默认降级动作 {#inputs.proc.ai_agent.enforcement.default_fallback} @@ -2567,6 +2605,10 @@ inputs: | ---- | ---------------------------- | | Type | string | +**详细描述**: + +兼容保留字段。当前内核阻断链路不会消费该值,建议保持默认。 + ##### 最大规则数 {#inputs.proc.ai_agent.enforcement.max_rules} **标签**: @@ -2591,7 +2633,21 @@ inputs: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [0, 1024] | +| Range | [0, 256] | + +**详细描述**: + +编译后的 exec BPF record 最大条数,不是顶层规则对象数量。 + +说明: + +- 每个 `exec.exact` 会占用 1 条 exec BPF record。 +- 每个 `exec.suffix` 会占用 1 条 exec BPF record。 +- 如果配置了 `argv_matches`,还会按 `argv_matches` 数量放大。 +- 只审计规则如果最终会编译成 `exact` / `suffix` / `argv_matches` 形式的 exec record,也会计入这里。 +- 纯 `exec.prefix` / `exec.argv_contains_any` 审计规则只在用户态匹配,不消耗 exec BPF record。 +- 当前 exec 强阻断上限是 `256` 条编译后的 record。 +- direct syscall 路径使用单独的固定 map,并且支持的 syscall 集更小。 ##### 规则 {#inputs.proc.ai_agent.enforcement.rules} @@ -2616,11 +2672,145 @@ inputs: **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | dict | +| Type | list | **详细描述**: -AI Agent 执行阻断规则。强阻断 exec 规则支持 path exact/suffix 选择器,并可通过 argv_matches 指定固定 argv index 和 exact value。argv_contains_any 仅作为审计兼容字段,不支持强阻断。syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 +AI Agent 执行阻断规则。 + +当前真正生效的字段包括: + +- `scope`:仅支持 `ai_agent_tree` +- `target_type`:`exec` 或 `syscall` +- `action.type`:建议使用 `deny` 和 `audit`;只有 `deny` 会进入强阻断链路,其他值都会保持 audit-only +- `action.errno`:兼容保留字段;当前 BPF 阻断固定使用 `EPERM` +- `audit`:兼容保留字段;它不是行为开关,规则命中后仍会输出 `proc_block_event` +- `exec.exact` / `exec.suffix`:强阻断选择器 +- `exec.prefix`:仅用户态审计选择器;4.18 上不参与强阻断 +- `exec.argv_matches`:强阻断只支持 `index: 0..3`、`op: exact`,并且必须搭配 `exact` 或 `suffix` +- `exec.argv_contains_any`:仅用户态审计选择器,不参与强阻断 +- `syscall.names` / `syscall.symbols`:当前 direct syscall 仅支持 `reboot`、`init_module`、`finit_module`、`delete_module`、`kexec_load`,并且无论阻断还是审计都依赖 `kprobe_override` 支持 + +推荐示例: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: block + strategy: auto + syscall_strategy: override_only + allowed_mechanisms: + - lsm + - kprobe_override + max_rules: 256 + rules: + - id: block-uname + scope: ai_agent_tree + target_type: exec + action: + type: deny + errno: EPERM + audit: true + exec: + exact: + - /usr/bin/uname + - /bin/uname + suffix: + - /uname + - id: block-systemctl-reboot + scope: ai_agent_tree + target_type: exec + action: + type: deny + errno: EPERM + audit: true + exec: + exact: + - /usr/bin/systemctl + - /bin/systemctl + argv_matches: + - index: 1 + op: exact + value: reboot + - id: block-direct-reboot + scope: ai_agent_tree + target_type: syscall + action: + type: deny + errno: EPERM + audit: true + syscall: + names: + - reboot + symbols: + - __x64_sys_reboot +``` + +上面的示例一共占用 5 条编译后的 exec BPF record: + +- `block-uname`:2 个 `exact` + 1 个 `suffix` = 3 +- `block-systemctl-reboot`:2 个 `exact` * 1 个 `argv_matches` = 2 + +只审计示例: + +1. 如果希望所有规则都只审计,直接使用全局 `mode: audit_only`: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: audit_only + rules: + - id: audit-uname + scope: ai_agent_tree + target_type: exec + exec: + exact: + - /usr/bin/uname + - /bin/uname +``` + +2. 如果希望一部分规则阻断、另一部分规则只审计,保持 `mode: block`,并对只审计的规则显式写 `action.type: audit`: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: block + rules: + - id: audit-uname + scope: ai_agent_tree + target_type: exec + action: + type: audit + exec: + exact: + - /usr/bin/uname + - /bin/uname + - id: block-direct-reboot + scope: ai_agent_tree + target_type: syscall + action: + type: deny + errno: EPERM + syscall: + names: + - reboot + symbols: + - __x64_sys_reboot +``` + +说明: + +- `action.type: audit` 的规则仍会输出 `proc_block_event`,但 `guarantee` 会是 `audit_only`。 +- `audit: true/false` 不是“只审计”开关,不决定阻断行为。 ### 符号表 {#inputs.proc.symbol_table} diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 45f2a48783a..126d3930cb1 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2393,7 +2393,7 @@ Whether to enable AI Agent file IO event collection. #### Enforcement {#inputs.proc.ai_agent.enforcement} -AI Agent command and selected direct syscall execution enforcement. +AI Agent command and selected direct syscall audit/block enforcement. ##### Enabled {#inputs.proc.ai_agent.enforcement.enabled} @@ -2420,6 +2420,13 @@ inputs: | ---- | ---------------------------- | | Type | bool | +**Description**: + +Enable the AI Agent enforcement pipeline. + +- With `mode: audit_only`, it emits audit events only and does not prevent execution. +- With `mode: block`, matched `deny` rules may actually prevent execution. + ##### Mode {#inputs.proc.ai_agent.enforcement.mode} **Tags**: @@ -2451,6 +2458,20 @@ inputs: | ---- | ---------------------------- | | Type | string | +**Description**: + +Global enforcement mode. + +- `audit_only`: all matching rules emit `proc_block_event` with `guarantee=audit_only`; no command or syscall is prevented. +- `block`: only rules with `action.type: deny` attempt strong block; the rest remain audit-only. + +If you want a specific rule to stay audit-only under `mode: block`, use: + +```yaml +action: + type: audit +``` + ##### Strategy {#inputs.proc.ai_agent.enforcement.strategy} **Tags**: @@ -2474,11 +2495,8 @@ inputs: **Enum options**: | Value | Note | | ----- | ---------------------------- | -| auto | | -| lsm_only | | -| override_only | | -| sigkill_only | | -| audit_only | | +| auto | Recommended | +| lsm_only | Enable only path-only LSM exec blocking | **Schema**: | Key | Value | @@ -2487,7 +2505,17 @@ inputs: **Description**: -Enforcement mechanism selection for exec command blocking. Exec blocking uses BPF LSM when available. +Enforcement mechanism selection for path-only exec blocking. + +Current strong-block behavior: + +- `auto`: use BPF LSM for path-only exec rules when allowed and available. +- `lsm_only`: require the LSM path for path-only exec rules. + +Notes: + +- argv-qualified exec blocking is controlled by `allowed_mechanisms` plus `syscall_strategy`, because it uses the standalone kprobe override path. +- Historical values such as `override_only`, `sigkill_only`, and `audit_only` are compatibility leftovers and are not recommended for new configs. ##### Syscall Strategy {#inputs.proc.ai_agent.enforcement.syscall_strategy} @@ -2512,11 +2540,8 @@ inputs: **Enum options**: | Value | Note | | ----- | ---------------------------- | -| auto | | -| lsm_only | | -| override_only | | -| sigkill_only | | -| audit_only | | +| auto | Recommended | +| override_only | Enable direct-syscall blocking through kprobe override | **Schema**: | Key | Value | @@ -2525,7 +2550,14 @@ inputs: **Description**: -Mechanism selection for direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. +Mechanism selection for direct syscall blocking. + +Current strong-block behavior: + +- `auto` +- `override_only` + +Both currently map to the kprobe override implementation. Historical values such as `lsm_only`, `sigkill_only`, and `audit_only` are compatibility leftovers and are not recommended for new configs. ##### Allowed Mechanisms {#inputs.proc.ai_agent.enforcement.allowed_mechanisms} @@ -2547,17 +2579,13 @@ inputs: allowed_mechanisms: - lsm - kprobe_override - - sigkill - - seccomp ``` **Enum options**: | Value | Note | | ----- | ---------------------------- | -| lsm | | -| kprobe_override | | -| sigkill | | -| seccomp | | +| lsm | Path-only exec strong block | +| kprobe_override | argv-qualified exec and direct-syscall strong block | **Schema**: | Key | Value | @@ -2566,7 +2594,17 @@ inputs: **Description**: -Mechanisms allowed by configuration. kprobe_override is only attempted when this list contains it and runtime capability probing confirms support. +Mechanisms allowed by configuration. + +Current implementation consumes only: + +- `lsm` +- `kprobe_override` + +Notes: + +- `kprobe_override` is attempted only when this list contains it and runtime capability probing confirms support. +- `sigkill` and `seccomp` are compatibility leftovers and are not consumed by the current kernel blocking path. ##### Default Fallback {#inputs.proc.ai_agent.enforcement.default_fallback} @@ -2598,6 +2636,10 @@ inputs: | ---- | ---------------------------- | | Type | string | +**Description**: + +Reserved compatibility field. The current kernel blocking path does not consume this value. Keep the default. + ##### Max Rules {#inputs.proc.ai_agent.enforcement.max_rules} **Tags**: @@ -2622,7 +2664,21 @@ inputs: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [0, 1024] | +| Range | [0, 256] | + +**Description**: + +Maximum compiled exec BPF record count, not the number of top-level rule objects. + +Notes: + +- Each `exec.exact` entry consumes 1 exec BPF record. +- Each `exec.suffix` entry consumes 1 exec BPF record. +- If `argv_matches` is present, the record count is multiplied by the number of `argv_matches`. +- Audit-only rules still count when they compile into `exact` / `suffix` / `argv_matches`-backed exec records. +- Pure `exec.prefix` / `exec.argv_contains_any` audit rules stay in user space and do not consume exec BPF records. +- Current exec strong-block cap is `256` compiled records. +- The direct-syscall path uses a separate fixed map and a smaller supported syscall set. ##### Rules {#inputs.proc.ai_agent.enforcement.rules} @@ -2647,11 +2703,145 @@ inputs: **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | dict | +| Type | list | **Description**: -AI Agent enforcement rules. Strong-block exec rules support path exact/suffix selectors and optional argv_matches selectors with fixed argv index and exact value. argv_contains_any is audit-only compatibility and is not accepted for strong block. Syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. +AI Agent enforcement rules. + +Current effective rule fields: + +- `scope`: only `ai_agent_tree` is supported. +- `target_type`: `exec` or `syscall`. +- `action.type`: recommended values are `deny` and `audit`; only `deny` enters the strong-block path and other values remain audit-only. +- `action.errno`: reserved compatibility field. Current BPF blocking uses `EPERM`. +- `audit`: reserved compatibility field. It does not switch behavior, and matched rules still emit `proc_block_event`. +- `exec.exact` / `exec.suffix`: strong-block selectors. +- `exec.prefix`: user-space audit-only selector; not used for strong block on 4.18. +- `exec.argv_matches`: strong block supports only `index: 0..3`, `op: exact`, and it must be combined with `exact` or `suffix` path selectors. +- `exec.argv_contains_any`: user-space audit-only selector; not accepted for strong block. +- `syscall.names` / `syscall.symbols`: current direct-syscall support is limited to `reboot`, `init_module`, `finit_module`, `delete_module`, and `kexec_load`, and both block and audit-only syscall handling still rely on `kprobe_override`. + +Recommended example: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: block + strategy: auto + syscall_strategy: override_only + allowed_mechanisms: + - lsm + - kprobe_override + max_rules: 256 + rules: + - id: block-uname + scope: ai_agent_tree + target_type: exec + action: + type: deny + errno: EPERM + audit: true + exec: + exact: + - /usr/bin/uname + - /bin/uname + suffix: + - /uname + - id: block-systemctl-reboot + scope: ai_agent_tree + target_type: exec + action: + type: deny + errno: EPERM + audit: true + exec: + exact: + - /usr/bin/systemctl + - /bin/systemctl + argv_matches: + - index: 1 + op: exact + value: reboot + - id: block-direct-reboot + scope: ai_agent_tree + target_type: syscall + action: + type: deny + errno: EPERM + audit: true + syscall: + names: + - reboot + symbols: + - __x64_sys_reboot +``` + +The example above consumes 5 compiled exec BPF records: + +- `block-uname`: 2 `exact` + 1 `suffix` = 3 +- `block-systemctl-reboot`: 2 `exact` * 1 `argv_matches` = 2 + +Audit-only examples: + +1. If all rules should audit only, use the global `mode: audit_only`: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: audit_only + rules: + - id: audit-uname + scope: ai_agent_tree + target_type: exec + exec: + exact: + - /usr/bin/uname + - /bin/uname +``` + +2. If some rules should block and others should only audit, keep `mode: block` and set `action.type: audit` on the audit-only rule: + +```yaml +inputs: + proc: + ai_agent: + enforcement: + enabled: true + mode: block + rules: + - id: audit-uname + scope: ai_agent_tree + target_type: exec + action: + type: audit + exec: + exact: + - /usr/bin/uname + - /bin/uname + - id: block-direct-reboot + scope: ai_agent_tree + target_type: syscall + action: + type: deny + errno: EPERM + syscall: + names: + - reboot + symbols: + - __x64_sys_reboot +``` + +Notes: + +- A rule with `action.type: audit` still emits `proc_block_event`, but `guarantee` will be `audit_only`. +- `audit: true/false` is not the switch for audit-only behavior and does not control blocking. ### Symbol Table {#inputs.proc.symbol_table} diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index cc501fbbbd8..281211d0e07 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1727,20 +1727,25 @@ inputs: # type: section # name: # en: Enforcement - # ch: 执行阻断 + # ch: 执行治理 # description: - # en: AI Agent command and selected direct syscall execution enforcement. - # ch: AI Agent 命令和部分直接 syscall 执行阻断。 + # en: AI Agent command and selected direct syscall audit/block enforcement. + # ch: AI Agent 命令和部分直接 syscall 的审计/阻断。 enforcement: # type: bool # name: # en: Enabled - # ch: 开启执行阻断 + # ch: 开启执行治理 # unit: # range: [] # enum_options: [] # modification: hot_update # ee_feature: true + # description: + # en: |- + # Enable the AI Agent enforcement pipeline. In audit_only mode it emits audit events only; in block mode deny rules may prevent execution. + # ch: |- + # 开启 AI Agent enforcement 链路。mode=audit_only 时只输出审计事件;mode=block 时 deny 规则可能真正阻止执行。 enabled: false # type: string # name: @@ -1751,6 +1756,17 @@ inputs: # enum_options: [audit_only, block] # modification: hot_update # ee_feature: true + # description: + # en: |- + # Global enforcement mode. + # - audit_only: all matching rules emit proc_block_event with guarantee=audit_only; no command or syscall is prevented. + # - block: rules whose action.type is deny attempt strong block; rules with any other action.type remain audit-only. + # Use action.type: audit explicitly when you want a per-rule audit-only behavior under mode=block. + # ch: |- + # 全局 enforcement 模式。 + # - audit_only:所有命中规则都只输出 proc_block_event(guarantee=audit_only),不会真正阻止命令或 syscall。 + # - block:只有 action.type 为 deny 的规则会尝试进入强阻断链路,其他规则仍为 audit-only。 + # 如果希望在 mode=block 下让某条规则只审计,建议显式配置 action.type: audit。 mode: audit_only # type: string # name: @@ -1758,14 +1774,18 @@ inputs: # ch: Exec 阻断策略 # unit: # range: [] - # enum_options: [auto, lsm_only, override_only, sigkill_only, audit_only] + # enum_options: [auto, lsm_only] # modification: hot_update # ee_feature: true # description: # en: |- - # Enforcement mechanism selection for exec command blocking. Exec blocking uses BPF LSM when available. + # Enforcement mechanism selection for path-only exec blocking. Current strong block support uses BPF LSM for exact/suffix path rules. + # argv-qualified exec blocking still depends on allowed_mechanisms containing kprobe_override and syscall_strategy allowing override. + # Other legacy values are reserved for compatibility and are not recommended. # ch: |- - # exec 命令阻断的机制选择。exec 阻断在可用时使用 BPF LSM。 + # path-only exec 阻断的机制选择。当前强阻断仅通过 BPF LSM 处理 exact/suffix 路径规则。 + # 带 argv 条件的 exec 阻断仍依赖 allowed_mechanisms 包含 kprobe_override 且 syscall_strategy 允许 override。 + # 其他历史值仅为兼容保留,不建议配置。 strategy: auto # type: string # name: @@ -1773,14 +1793,16 @@ inputs: # ch: Syscall 阻断策略 # unit: # range: [] - # enum_options: [auto, lsm_only, override_only, sigkill_only, audit_only] + # enum_options: [auto, override_only] # modification: hot_update # ee_feature: true # description: # en: |- - # Mechanism selection for direct syscall blocking. kprobe override requires CONFIG_BPF_KPROBE_OVERRIDE and an error-injectable kernel function. + # Mechanism selection for direct syscall blocking. Current strong block support uses kprobe override only. + # Other legacy values are reserved for compatibility and are not recommended. # ch: |- - # 直接 syscall 阻断的机制选择。kprobe override 需要 CONFIG_BPF_KPROBE_OVERRIDE,并且目标内核函数必须支持 error injection。 + # 直接 syscall 阻断的机制选择。当前强阻断仅支持 kprobe override。 + # 其他历史值仅为兼容保留,不建议配置。 syscall_strategy: auto # type: string # name: @@ -1788,15 +1810,17 @@ inputs: # ch: 允许的阻断机制 # unit: # range: [] - # enum_options: [lsm, kprobe_override, sigkill, seccomp] + # enum_options: [lsm, kprobe_override] # modification: hot_update # ee_feature: true # description: # en: |- - # Mechanisms allowed by configuration. kprobe_override is only attempted when this list contains it and runtime capability probing confirms support. + # Mechanisms allowed by configuration. Current implementation uses lsm for path-only exec rules and kprobe_override for argv-qualified exec rules and selected direct syscalls. + # sigkill/seccomp are reserved compatibility values and are not consumed by the current kernel blocking path. # ch: |- - # 配置允许使用的阻断机制。只有列表包含 kprobe_override 且运行时能力探测确认支持时,才会尝试使用 bpf_override_return。 - allowed_mechanisms: [lsm, kprobe_override, sigkill, seccomp] + # 配置允许使用的阻断机制。当前实现使用 lsm 处理 path-only exec 规则,使用 kprobe_override 处理 argv-qualified exec 规则和部分 direct syscall。 + # sigkill/seccomp 仅为兼容保留值,当前内核阻断链路不会消费。 + allowed_mechanisms: [lsm, kprobe_override] # type: string # name: # en: Default Fallback @@ -1806,18 +1830,34 @@ inputs: # enum_options: [sigkill] # modification: hot_update # ee_feature: true + # description: + # en: |- + # Reserved compatibility field. The current kernel blocking path does not consume this value; keep the default. + # ch: |- + # 兼容保留字段。当前内核阻断链路不会消费该值,建议保持默认。 default_fallback: sigkill # type: int # name: # en: Max Rules # ch: 最大规则数 # unit: - # range: [0, 1024] + # range: [0, 256] # enum_options: [] # modification: hot_update # ee_feature: true + # description: + # en: |- + # Maximum compiled exec BPF record count, not the number of top-level rule objects. + # Each exact/suffix path selector consumes one exec record; if argv_matches is present, the record count is multiplied by the number of argv_matches. + # Audit-only rules still count when they compile into exact/suffix/argv_matches-backed exec records; pure prefix/argv_contains_any audit rules stay in user space and do not consume exec BPF records. + # Current exec strong-block cap is 256 compiled records. The direct-syscall path still uses a smaller fixed map and a fixed supported syscall set. + # ch: |- + # 编译后的 exec BPF record 最大条数,不是顶层规则对象数量。 + # 每个 exact/suffix 路径选择器会占用 1 条 exec record;如果配置了 argv_matches,还会按 argv_matches 数量再放大。 + # 只审计规则如果会编译成 exact/suffix/argv_matches 形式的 exec record,也会计入这里;纯 prefix/argv_contains_any 审计规则留在用户态匹配,不消耗 exec BPF record。 + # 当前 exec 强阻断上限是 256 条编译后的 record;direct syscall 路径仍使用更小的固定 map 和固定 syscall 集。 max_rules: 256 - # type: dict + # type: list # name: # en: Rules # ch: 规则 @@ -1828,9 +1868,31 @@ inputs: # ee_feature: true # description: # en: |- - # AI Agent enforcement rules. Strong-block exec rules support path exact/suffix selectors and optional argv_matches selectors with fixed argv index and exact value. argv_contains_any is audit-only compatibility and is not accepted for strong block. Syscall rules support selected dangerous syscall names or kernel symbols such as reboot, init_module, finit_module, delete_module and kexec_load. + # AI Agent enforcement rules. + # Current effective fields: + # - scope: only ai_agent_tree is supported + # - target_type: exec or syscall + # - action.type: recommended values are deny and audit; only deny enters the strong-block path, other values remain audit-only + # - action.errno: reserved compatibility field; current BPF block path uses EPERM + # - audit: reserved compatibility field; it does not switch behavior, and matched rules still emit proc_block_event + # - exec.exact / exec.suffix: strong-block selectors + # - exec.prefix: user-space audit-only selector; not used for strong block on 4.18 + # - exec.argv_matches: strong block supports index 0..3, op exact, and requires exact/suffix path selectors + # - exec.argv_contains_any: user-space audit-only selector; not accepted for strong block + # - syscall.names / syscall.symbols: current direct-syscall support is limited to reboot, init_module, finit_module, delete_module and kexec_load, and both block/audit-only syscall handling still rely on kprobe_override support # ch: |- - # AI Agent 执行阻断规则。强阻断 exec 规则支持 path exact/suffix 选择器,并可通过 argv_matches 指定固定 argv index 和 exact value。argv_contains_any 仅作为审计兼容字段,不支持强阻断。syscall 规则支持部分危险 syscall 名称或内核符号,例如 reboot、init_module、finit_module、delete_module、kexec_load。 + # AI Agent 执行阻断规则。 + # 当前真正生效的字段包括: + # - scope:仅支持 ai_agent_tree + # - target_type:exec 或 syscall + # - action.type:建议使用 deny 和 audit;只有 deny 会进入强阻断链路,其他值都会保持 audit-only + # - action.errno:兼容保留字段;当前 BPF 阻断路径固定使用 EPERM + # - audit:兼容保留字段;它不是行为开关,规则命中后仍会输出 proc_block_event + # - exec.exact / exec.suffix:强阻断选择器 + # - exec.prefix:仅用户态审计选择器;4.18 上不参与强阻断 + # - exec.argv_matches:强阻断只支持 index 0..3、op exact,且必须搭配 exact/suffix 路径 + # - exec.argv_contains_any:仅用户态审计选择器,不参与强阻断 + # - syscall.names / syscall.symbols:当前 direct syscall 仅支持 reboot、init_module、finit_module、delete_module、kexec_load,且无论阻断还是审计都依赖 kprobe_override 支持 rules: [] # type: section # name: From 02f664bd817cefffabcb0ff2a56dd376dea85a92 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 20 May 2026 18:59:45 +0800 Subject: [PATCH 19/24] feat(agent): support ai agent process matcher recognition --- agent/crates/enterprise-utils/src/lib.rs | 13 ++ agent/src/config/config.rs | 20 +++ agent/src/utils/process/linux.rs | 205 ++++++++++++++++++++++- server/agent_config/README-CH.md | 7 +- server/agent_config/README.md | 9 +- server/agent_config/template.yaml | 17 +- 6 files changed, 259 insertions(+), 12 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index e1b9b4bd318..fc5a09de8f5 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -672,6 +672,15 @@ pub mod ai_agent { false } + pub fn register_process_matcher( + &self, + _pid: u32, + _process_name: &str, + _now: Duration, + ) -> bool { + false + } + pub fn is_ai_agent(&self, _pid: u32) -> bool { false } @@ -692,6 +701,10 @@ pub mod ai_agent { vec![] } + pub fn remove_process_matcher_root(&self, _root_pid: u32) -> Vec { + vec![] + } + pub fn len(&self) -> usize { 0 } diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 743506789f8..0fcfc0a91b0 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -3676,6 +3676,7 @@ pub const OS_PROC_REGEXP_MATCH_TYPE_PROC_NAME: &'static str = "process_name"; pub const OS_PROC_REGEXP_MATCH_TYPE_PARENT_PROC_NAME: &'static str = "parent_process_name"; pub const OS_PROC_REGEXP_MATCH_TYPE_TAG: &'static str = "tag"; pub const OS_PROC_REGEXP_MATCH_TYPE_CMD_WITH_ARGS: &'static str = "cmdline_with_args"; +pub const OS_PROC_ENABLED_FEATURE_AI_AGENT: &str = "proc.ai_agent"; // use for proc scan match and replace #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Default)] @@ -4306,6 +4307,25 @@ enabled: true assert_eq!(proc.process_matcher, Proc::default().process_matcher); } + #[test] + fn parse_proc_config_with_ai_agent_feature() { + let yaml = r#" +process_matcher: +- match_regex: ^python3$ + match_type: process_name + enabled_features: [proc.ai_agent, proc.gprocess_info] +"#; + let proc: Proc = serde_yaml::from_str(yaml).unwrap(); + + assert_eq!( + proc.process_matcher[0].enabled_features, + vec![ + OS_PROC_ENABLED_FEATURE_AI_AGENT.to_string(), + "proc.gprocess_info".to_string(), + ] + ); + } + #[test] fn java_classpath_process_matcher_rewrites_to_class_name() { let matcher = &Proc::default().process_matcher[1]; diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 9f487d3b356..e3b0aabd5ef 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -30,12 +30,15 @@ use std::{ time::Duration, }; +#[cfg(feature = "enterprise")] +use std::time::{SystemTime, UNIX_EPOCH}; + use log::{debug, error, info, trace}; use nix::sys::utsname::uname; use procfs::process::all_processes_with_root; use crate::common::flow::BIZ_TYPE_AI_AGENT; -use crate::config::ProcessMatcher; +use crate::config::{config::OS_PROC_ENABLED_FEATURE_AI_AGENT, ProcessMatcher}; use crate::platform::{get_os_app_tag_by_exec, ProcessData, ProcessDataOp}; //返回当前进程占用内存RSS单位(字节) @@ -369,7 +372,7 @@ impl ProcessListener { } for (feature, mut node) in current.features.drain() { - if node.callback.is_some() { + if node.callback.is_some() || feature == OS_PROC_ENABLED_FEATURE_AI_AGENT { node.process_matcher.clear(); features.insert(feature, node); } @@ -477,6 +480,7 @@ impl ProcessListener { process_data_cache.retain(|pid, _| alive_pids.contains(pid)); for (key, value) in features.iter_mut() { + let is_ai_agent_matcher_feature = key.as_str() == OS_PROC_ENABLED_FEATURE_AI_AGENT; let ai_agent_pids = if should_fetch_ai_agent_pids(key.as_str(), value.callback.is_none()) { fetch_ai_agent_pids(key.as_str()) @@ -484,12 +488,14 @@ impl ProcessListener { Vec::new() }; - if should_skip_feature( - value.process_matcher.is_empty(), - value.pids.is_empty(), - ai_agent_pids.is_empty(), - value.callback.is_none(), - ) { + if !is_ai_agent_matcher_feature + && should_skip_feature( + value.process_matcher.is_empty(), + value.pids.is_empty(), + ai_agent_pids.is_empty(), + value.callback.is_none(), + ) + { continue; } @@ -527,6 +533,21 @@ impl ProcessListener { process_datas.sort_by_key(|x| x.pid); process_datas.merge_and_dedup(); + if is_ai_agent_matcher_feature { + #[cfg(feature = "enterprise")] + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + sync_ai_agent_process_matcher_registry( + registry.as_ref(), + &value.pids, + &process_datas, + ai_agent_registry_now(), + ); + } + value.pids = pids; + value.process_datas = process_datas; + continue; + } + if pids != value.pids || process_datas != value.process_datas { debug!("Feature {} update {} pids {:?}.", key, pids.len(), pids); value.callback.as_ref().unwrap()(&pids, &process_datas); @@ -603,6 +624,50 @@ fn should_fetch_ai_agent_pids(feature: &str, callback_missing: bool) -> bool { feature == "proc.gprocess_info" || feature == "proc.socket_list" } +#[cfg(feature = "enterprise")] +trait AiAgentProcessMatcherRegistry { + fn register_process_matcher(&self, pid: u32, process_name: &str, now: Duration) -> bool; + fn remove_process_matcher_root(&self, root_pid: u32) -> Vec; +} + +#[cfg(feature = "enterprise")] +impl AiAgentProcessMatcherRegistry for enterprise_utils::ai_agent::AiAgentRegistry { + fn register_process_matcher(&self, pid: u32, process_name: &str, now: Duration) -> bool { + self.register_process_matcher(pid, process_name, now) + } + + fn remove_process_matcher_root(&self, root_pid: u32) -> Vec { + self.remove_process_matcher_root(root_pid) + } +} + +#[cfg(feature = "enterprise")] +fn ai_agent_registry_now() -> Duration { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() +} + +#[cfg(feature = "enterprise")] +fn sync_ai_agent_process_matcher_registry( + registry: &R, + previous_pids: &[u32], + process_datas: &[ProcessData], + now: Duration, +) { + let current_pids: HashSet = process_datas.iter().map(|pd| pd.pid as u32).collect(); + + for process_data in process_datas { + registry.register_process_matcher(process_data.pid as u32, &process_data.process_name, now); + } + + for pid in previous_pids { + if !current_pids.contains(pid) { + registry.remove_process_matcher_root(*pid); + } + } +} + #[cfg(feature = "enterprise")] fn fetch_ai_agent_pids(feature: &str) -> Vec { // AI Agent processes must participate in both gprocess_info and socket_list. @@ -662,6 +727,9 @@ mod tests { use super::*; use std::time::Duration; + #[cfg(feature = "enterprise")] + use std::sync::Mutex; + fn make_process_data(pid: u64) -> ProcessData { ProcessData { name: format!("proc-{pid}"), @@ -741,4 +809,125 @@ mod tests { assert!(!should_fetch_ai_agent_pids("proc.process_info", false)); assert!(!should_fetch_ai_agent_pids("proc.socket_list", true)); } + + #[test] + fn set_preserves_ai_agent_node_for_reconcile_after_matcher_removal() { + let matcher = ProcessMatcher { + enabled_features: vec![OS_PROC_ENABLED_FEATURE_AI_AGENT.to_string()], + ..Default::default() + }; + let listener = ProcessListener::new( + &Vec::new(), + &vec![matcher], + "/proc".to_string(), + String::new(), + Vec::new(), + ); + + { + let mut features = listener.features.write().unwrap(); + let node = features + .features + .get_mut(OS_PROC_ENABLED_FEATURE_AI_AGENT) + .expect("ai agent feature node missing"); + node.pids = vec![4001]; + node.process_datas = vec![make_process_data(4001)]; + } + + listener.set(&Vec::new(), &Vec::new()); + + let features = listener.features.read().unwrap(); + let node = features + .features + .get(OS_PROC_ENABLED_FEATURE_AI_AGENT) + .expect("ai agent feature node should be preserved for reconcile"); + assert!(node.process_matcher.is_empty()); + assert_eq!(node.pids, vec![4001]); + assert_eq!(node.process_datas, vec![make_process_data(4001)]); + } + + #[cfg(feature = "enterprise")] + #[derive(Default)] + struct FakeAiAgentRegistry { + endpoint_pids: Mutex>, + process_matcher_pids: Mutex>, + } + + #[cfg(feature = "enterprise")] + impl FakeAiAgentRegistry { + fn register_endpoint(&self, pid: u32) -> bool { + self.endpoint_pids.lock().unwrap().insert(pid) + } + + fn is_ai_agent(&self, pid: u32) -> bool { + self.endpoint_pids.lock().unwrap().contains(&pid) + || self.process_matcher_pids.lock().unwrap().contains(&pid) + } + } + + #[cfg(feature = "enterprise")] + impl AiAgentProcessMatcherRegistry for FakeAiAgentRegistry { + fn register_process_matcher(&self, pid: u32, _process_name: &str, _now: Duration) -> bool { + self.process_matcher_pids.lock().unwrap().insert(pid) + } + + fn remove_process_matcher_root(&self, root_pid: u32) -> Vec { + let removed = self.process_matcher_pids.lock().unwrap().remove(&root_pid); + if removed && !self.endpoint_pids.lock().unwrap().contains(&root_pid) { + return vec![root_pid]; + } + Vec::new() + } + } + + #[cfg(feature = "enterprise")] + #[test] + fn sync_ai_agent_process_matcher_registry_registers_matches() { + let registry = FakeAiAgentRegistry::default(); + let process_datas = vec![make_process_data(3001)]; + + sync_ai_agent_process_matcher_registry( + ®istry, + &[], + &process_datas, + Duration::from_secs(1), + ); + + assert!(registry.is_ai_agent(3001)); + } + + #[cfg(feature = "enterprise")] + #[test] + fn sync_ai_agent_process_matcher_registry_removes_missing_matches() { + let registry = FakeAiAgentRegistry::default(); + let process_datas = vec![make_process_data(3002)]; + + sync_ai_agent_process_matcher_registry( + ®istry, + &[], + &process_datas, + Duration::from_secs(1), + ); + sync_ai_agent_process_matcher_registry(®istry, &[3002], &[], Duration::from_secs(2)); + + assert!(!registry.is_ai_agent(3002)); + } + + #[cfg(feature = "enterprise")] + #[test] + fn sync_ai_agent_process_matcher_registry_preserves_endpoint_sources() { + let registry = FakeAiAgentRegistry::default(); + let process_datas = vec![make_process_data(3003)]; + + assert!(registry.register_endpoint(3003)); + sync_ai_agent_process_matcher_registry( + ®istry, + &[], + &process_datas, + Duration::from_secs(2), + ); + sync_ai_agent_process_matcher_registry(®istry, &[3003], &[], Duration::from_secs(3)); + + assert!(registry.is_ai_agent(3003)); + } } diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index 0a6b3adf040..a58dea557bf 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -1937,6 +1937,7 @@ rewrite_name 可定义为正则表达式捕获组索引,或 windows 风格的 - ignore: 是否要忽略匹配到的进程,缺省值为 `false` - rewrite_name: 使用正则替换匹配到的进程名或命令行,缺省值为 `""` 表示不做替换。 - enabled_features: 为匹配到的进程开启的特性列表,可选项如下 + - proc.ai_agent(仅负责 AI Agent 识别,不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list`) - proc.gprocess_info(注意确认 `inputs.proc.enabled` 已配置为 **true**) - proc.golang_symbol_table(注意确认 `inputs.proc.symbol_table.golang_specific.enabled` 已配置为 **true**) - proc.socket_list(注意确认 `inputs.proc.socket_info_sync_interval` 已配置为**大于 0 的数字**) @@ -2243,6 +2244,7 @@ inputs: **枚举可选值**: | Value | Note | | ----- | ---------------------------- | +| proc.ai_agent | 仅负责 AI Agent 识别,不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list` | | proc.gprocess_info | 同步进程资源信息,并为 eBPF 原始观测数据注入所在观测点上的进程标签 | | proc.golang_symbol_table | 解析 Golang 特有符号表,用于 Golang 进程裁剪了标准符号表时的剖析数据优化 | | proc.socket_list | 同步进程的活跃 Socket 信息,用于为应用和网络观测数据注入通信双方的进程标签 | @@ -2259,7 +2261,10 @@ inputs: **详细描述**: -注意也需要同时开启相关特性的总开关: +`proc.ai_agent` 仅负责 AI Agent 识别,不需要额外开启其他全局总开关, +但也不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list`。 + +其他相关特性仍需同时开启对应的总开关: - proc.gprocess_info(注意确认 `inputs.proc.enabled` 已配置为 **true**) - proc.golang_symbol_table(注意确认 `inputs.proc.symbol_table.golang_specific.enabled` 已配置为 **true**) - proc.socket_list(注意确认 `inputs.proc.socket_info_sync_interval` 已配置为**大于 0 的数字**) diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 126d3930cb1..2af754059f3 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -1968,6 +1968,7 @@ Configuration Item: - rewrite_name: The name will replace the process name or cmd use regexp replace. Default value `""` means no replacement. - enabled_features: List of features enabled for matched processes. Available options: + - proc.ai_agent (Only identifies AI Agents; it does not implicitly enable `proc.gprocess_info` or `proc.socket_list`) - proc.gprocess_info (Ensure `inputs.proc.enabled` is configured to **true**) - proc.golang_symbol_table (Ensure `inputs.proc.symbol_table.golang_specific.enabled` is configured to **true**) - proc.socket_list (Ensure `inputs.proc.socket_info_sync_interval` is configured to a **number > 0**) @@ -2274,6 +2275,7 @@ inputs: **Enum options**: | Value | Note | | ----- | ---------------------------- | +| proc.ai_agent | Only identifies AI Agents; it does not implicitly enable `proc.gprocess_info` or `proc.socket_list` | | proc.gprocess_info | Synchronize process resource information and inject process tags from the observation point into raw eBPF data | | proc.golang_symbol_table | Parse Golang-specific symbol tables to optimize profiling data when Golang processes prune the standard symbol table | | proc.socket_list | Synchronize active socket information of processes to inject process labels for both peers in application and network observation data | @@ -2290,7 +2292,12 @@ inputs: **Description**: -Also ensure the global configuration parameters for related features are enabled: +`proc.ai_agent` only identifies AI Agents. It does not require another +global feature toggle, and it does not implicitly enable +`proc.gprocess_info` or `proc.socket_list`. + +Other related features still require their corresponding global +configuration parameters to be enabled: - proc.gprocess_info (Ensure `inputs.proc.enabled` is configured to **true**) - proc.golang_symbol_table (Ensure `inputs.proc.symbol_table.golang_specific.enabled` is configured to **true**) - proc.socket_list (Ensure `inputs.proc.socket_info_sync_interval` is configured to a **number > 0**) diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index 281211d0e07..bbccff192af 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1339,6 +1339,7 @@ inputs: # - ignore: 是否要忽略匹配到的进程,缺省值为 `false` # - rewrite_name: 使用正则替换匹配到的进程名或命令行,缺省值为 `""` 表示不做替换。 # - enabled_features: 为匹配到的进程开启的特性列表,可选项如下 + # - proc.ai_agent(仅负责 AI Agent 识别,不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list`) # - proc.gprocess_info(注意确认 `inputs.proc.enabled` 已配置为 **true**) # - proc.golang_symbol_table(注意确认 `inputs.proc.symbol_table.golang_specific.enabled` 已配置为 **true**) # - proc.socket_list(注意确认 `inputs.proc.socket_info_sync_interval` 已配置为**大于 0 的数字**) @@ -1392,6 +1393,7 @@ inputs: # - rewrite_name: The name will replace the process name or cmd use regexp replace. # Default value `""` means no replacement. # - enabled_features: List of features enabled for matched processes. Available options: + # - proc.ai_agent (Only identifies AI Agents; it does not implicitly enable `proc.gprocess_info` or `proc.socket_list`) # - proc.gprocess_info (Ensure `inputs.proc.enabled` is configured to **true**) # - proc.golang_symbol_table (Ensure `inputs.proc.symbol_table.golang_specific.enabled` is configured to **true**) # - proc.socket_list (Ensure `inputs.proc.socket_info_sync_interval` is configured to a **number > 0**) @@ -1578,6 +1580,9 @@ inputs: # unit: # range: [] # enum_options: + # - proc.ai_agent: + # ch: 仅负责 AI Agent 识别,不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list` + # en: Only identifies AI Agents; it does not implicitly enable `proc.gprocess_info` or `proc.socket_list` # - proc.gprocess_info: # ch: 同步进程资源信息,并为 eBPF 原始观测数据注入所在观测点上的进程标签 # en: Synchronize process resource information and inject process tags from the observation point into raw eBPF data @@ -1612,7 +1617,10 @@ inputs: # ee_feature: false # description: # ch: |- - # 注意也需要同时开启相关特性的总开关: + # `proc.ai_agent` 仅负责 AI Agent 识别,不需要额外开启其他全局总开关, + # 但也不会隐式开启 `proc.gprocess_info` 或 `proc.socket_list`。 + # + # 其他相关特性仍需同时开启对应的总开关: # - proc.gprocess_info(注意确认 `inputs.proc.enabled` 已配置为 **true**) # - proc.golang_symbol_table(注意确认 `inputs.proc.symbol_table.golang_specific.enabled` 已配置为 **true**) # - proc.socket_list(注意确认 `inputs.proc.socket_info_sync_interval` 已配置为**大于 0 的数字**) @@ -1622,7 +1630,12 @@ inputs: # - ebpf.profile.off_cpu(注意确认 `inputs.ebpf.profile.off_cpu.disabled` 已配置为 **false**) # - ebpf.profile.memory(注意确认 `inputs.ebpf.profile.memory.disabled` 已配置为 **false**) # en: |- - # Also ensure the global configuration parameters for related features are enabled: + # `proc.ai_agent` only identifies AI Agents. It does not require another + # global feature toggle, and it does not implicitly enable + # `proc.gprocess_info` or `proc.socket_list`. + # + # Other related features still require their corresponding global + # configuration parameters to be enabled: # - proc.gprocess_info (Ensure `inputs.proc.enabled` is configured to **true**) # - proc.golang_symbol_table (Ensure `inputs.proc.symbol_table.golang_specific.enabled` is configured to **true**) # - proc.socket_list (Ensure `inputs.proc.socket_info_sync_interval` is configured to a **number > 0**) From 507fdcdf1394e41e66c97c9e84b782776aea3bc6 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 21 May 2026 10:29:02 +0800 Subject: [PATCH 20/24] fix(agent): propagate ai agent biz type to ebpf l7 logs --- .../flow_generator/protocol_logs/parser.rs | 69 ++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/agent/src/flow_generator/protocol_logs/parser.rs b/agent/src/flow_generator/protocol_logs/parser.rs index 49247db20f7..7b22bc83b86 100644 --- a/agent/src/flow_generator/protocol_logs/parser.rs +++ b/agent/src/flow_generator/protocol_logs/parser.rs @@ -34,7 +34,7 @@ use super::{AppProtoHead, AppProtoLogsBaseInfo, BoxAppProtoLogsData}; use crate::{ common::{ ebpf::EbpfType, - flow::{L7Protocol, PacketDirection, SignalSource}, + flow::{L7Protocol, PacketDirection, SignalSource, BIZ_TYPE_AI_AGENT}, l7_protocol_info::{L7ProtocolInfo, L7ProtocolInfoInterface}, meta_packet::ProtocolData, MetaPacket, TaggedFlow, Timestamp, @@ -86,6 +86,45 @@ impl fmt::Display for MetaAppProto { } } +fn resolve_biz_type_from_processes( + biz_type: u8, + process_id_0: u32, + process_id_1: u32, + is_ai_agent: F, +) -> u8 +where + F: Fn(u32) -> bool, +{ + if biz_type != 0 { + return biz_type; + } + + if (process_id_0 != 0 && is_ai_agent(process_id_0)) + || (process_id_1 != 0 && is_ai_agent(process_id_1)) + { + return BIZ_TYPE_AI_AGENT; + } + + biz_type +} + +#[cfg(feature = "enterprise")] +fn enrich_ai_agent_biz_type(base_info: &mut AppProtoLogsBaseInfo) { + let Some(registry) = enterprise_utils::ai_agent::global_registry() else { + return; + }; + + base_info.biz_type = resolve_biz_type_from_processes( + base_info.biz_type, + base_info.process_id_0, + base_info.process_id_1, + |pid| registry.is_ai_agent(pid), + ); +} + +#[cfg(not(feature = "enterprise"))] +fn enrich_ai_agent_biz_type(_base_info: &mut AppProtoLogsBaseInfo) {} + impl MetaAppProto { pub fn new( flow: &TaggedFlow, @@ -197,6 +236,7 @@ impl MetaAppProto { if l7_info.is_reversed() { base_info.reverse() } + enrich_ai_agent_biz_type(&mut base_info); Some(Self { base_info, @@ -737,3 +777,30 @@ impl SessionAggregator { info!("app protocol logs parser (id={}) stopped", self.id); } } + +#[cfg(test)] +mod tests { + use super::resolve_biz_type_from_processes; + use crate::common::flow::BIZ_TYPE_AI_AGENT; + + #[test] + fn resolve_biz_type_from_processes_marks_src_ai_agent() { + assert_eq!( + resolve_biz_type_from_processes(0, 1001, 0, |pid| pid == 1001), + BIZ_TYPE_AI_AGENT + ); + } + + #[test] + fn resolve_biz_type_from_processes_marks_dst_ai_agent() { + assert_eq!( + resolve_biz_type_from_processes(0, 0, 2002, |pid| pid == 2002), + BIZ_TYPE_AI_AGENT + ); + } + + #[test] + fn resolve_biz_type_from_processes_preserves_existing_value() { + assert_eq!(resolve_biz_type_from_processes(7, 1001, 2002, |_| true), 7); + } +} From 1d998efc15b2b7f6aec5e1ae3e0515246e750f55 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 21 May 2026 13:58:36 +0800 Subject: [PATCH 21/24] fix(ebpf): avoid double output on 5.2+ syscall path --- agent/src/ebpf/kernel/files_rw.bpf.c | 2 +- agent/src/ebpf/kernel/socket_trace.bpf.c | 87 +++++++++++++++++-- .../test/test_ai_agent_source_contracts.py | 37 ++++++++ 3 files changed, 117 insertions(+), 9 deletions(-) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index 8b02a366002..a00e6d6722a 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -438,7 +438,7 @@ static __inline int trace_io_event_common(void *ctx, v->thread_trace_id = trace_id; v->msg_type = MSG_COMMON; bpf_get_current_comm(v->comm, sizeof(v->comm)); -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_TAIL_CALLS struct tail_calls_context *context = (struct tail_calls_context *)v->data; context->max_size_limit = data_max_sz; diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 319d2262315..2ff933d39df 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -39,6 +39,24 @@ #define __user +/* + * Linux 5.2+ kernels allow programs larger than the old 4096-insn cap, but + * Linux 5.15 still has a 1,000,000 processed-insn verifier complexity limit. + * AI Agent governance adds state to syscall socket hot paths, so split those + * paths in the 5.2_plus object while leaving uprobe paths on the old layout. + */ +#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#define USE_SOCKET_TRACE_TAIL_CALLS 1 +#endif + +#if defined(LINUX_VER_5_2_PLUS) && defined(EXTENDED_AI_AGENT_FILE_IO) +#define USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS 1 +#endif + +#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS) +#define USE_SOCKET_TRACE_INLINE_OUTPUT 1 +#endif + #ifdef EXTENDED_AI_AGENT_FILE_IO #ifndef AI_AGENT_PROC_FORK #define AI_AGENT_PROC_FORK 1 @@ -1329,7 +1347,7 @@ static __inline void trace_process(struct socket_info_s *socket_info_ptr, } } -#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_INLINE_OUTPUT static __inline int __output_data_common(void *ctx, struct tracer_ctx_s *tracer_ctx, struct __socket_data_buffer *v_buff, @@ -1751,7 +1769,23 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, p->protocols[idx] = (__u8) v->data_type; } } +#endif + +#ifdef USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS + if (extra->source == DATA_SOURCE_SYSCALL) { + struct tail_calls_context *context = + (struct tail_calls_context *)v->data; + context->max_size_limit = data_max_sz; + context->push_reassembly_bytes = send_reasm_bytes; + context->vecs = (bool) vecs; + context->is_close = false; + context->dir = conn_info->direction; + + return SUBMIT_OK; + } +#endif +#ifdef USE_SOCKET_TRACE_INLINE_OUTPUT return __output_data_common(ctx, tracer_ctx, v_buff, args, conn_info->direction, (bool) vecs, tracer_ctx->data_limit_max, false, @@ -1843,7 +1877,11 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, if (!(sk != NULL && ((sock_state = is_tcp_udp_data(sk, offset, conn_info)) != SOCK_CHECK_TYPE_ERROR))) { -#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS + if (extra->source == DATA_SOURCE_SYSCALL) + return -2; +#endif +#ifdef USE_SOCKET_TRACE_INLINE_OUTPUT return trace_io_event_common(ctx, offset, args, direction, id); #else return -2; // This means attempting to handle I/O events. @@ -1914,7 +1952,7 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, if (act == INFER_TERMINATE) return -1; -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_TAIL_CALLS if (disable_kprobe && extra->source == DATA_SOURCE_SYSCALL) return -1; @@ -1938,6 +1976,26 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, PROG_PROTO_INFER_KP_2_IDX); } } +#elif defined(USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS) + if (extra->source == DATA_SOURCE_SYSCALL) { + if (disable_kprobe) + return -1; + + if (act == INFER_CONTINUE) { + ctx_map->tail_call.conn_info = __conn_info; + ctx_map->tail_call.extra = *extra; + ctx_map->tail_call.bytes_count = bytes_count; + ctx_map->tail_call.offset = offset; + ctx_map->tail_call.dir = direction; +#ifdef SUPPORTS_KPROBE_ONLY + bpf_tail_call(ctx, &NAME(progs_jmp_kp_map), + PROG_PROTO_INFER_KP_2_IDX); +#else + bpf_tail_call(ctx, &NAME(progs_jmp_tp_map), + PROG_PROTO_INFER_TP_2_IDX); +#endif + } + } #endif if (conn_info->protocol == PROTO_CUSTOM) { @@ -1950,7 +2008,7 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, // data_submit can be performed, otherwise MySQL data may be lost if (conn_info->protocol != PROTO_UNKNOWN || conn_info->message_type != MSG_UNKNOWN) { -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_TAIL_CALLS /* * Fill in tail call context information. */ @@ -1959,6 +2017,17 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, ctx_map->tail_call.bytes_count = bytes_count; ctx_map->tail_call.offset = offset; return 0; +#elif defined(USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS) + if (extra->source == DATA_SOURCE_SYSCALL) { + ctx_map->tail_call.conn_info = __conn_info; + ctx_map->tail_call.extra = *extra; + ctx_map->tail_call.bytes_count = bytes_count; + ctx_map->tail_call.offset = offset; + return 0; + } + return __data_submit(ctx, conn_info, args, extra->vecs, + bytes_count, offset, args->enter_ts, + extra); #else return __data_submit(ctx, conn_info, args, extra->vecs, bytes_count, offset, args->enter_ts, @@ -1983,7 +2052,8 @@ static __inline void process_syscall_data(struct pt_regs *ctx, __u64 id, int result = process_data(ctx, id, direction, args, bytes_count, &extra); if (result == 0) { -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#if defined(USE_SOCKET_TRACE_TAIL_CALLS) || \ + defined(USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS) #ifdef SUPPORTS_KPROBE_ONLY bpf_tail_call(ctx, &NAME(progs_jmp_kp_map), PROG_DATA_SUBMIT_KP_IDX); @@ -2017,7 +2087,8 @@ static __inline void process_syscall_data_vecs(struct pt_regs *ctx, __u64 id, int result = process_data(ctx, id, direction, args, bytes_count, &extra); if (result == 0) { -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#if defined(USE_SOCKET_TRACE_TAIL_CALLS) || \ + defined(USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS) #ifdef SUPPORTS_KPROBE_ONLY bpf_tail_call(ctx, &NAME(progs_jmp_kp_map), PROG_DATA_SUBMIT_KP_IDX); @@ -2960,7 +3031,7 @@ static __inline void __push_close_event(__u64 pid_tgid, __u64 uid, __u64 seq, v->fd = fd; bpf_get_current_comm(v->comm, sizeof(v->comm)); -#if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_TAIL_CALLS struct tail_calls_context *context = (struct tail_calls_context *)v->data; context->max_size_limit = data_max_sz; @@ -3343,7 +3414,7 @@ static __inline int output_extra_data_common(struct data_args_t *args, struct __ return 0; } -#if defined(LINUX_VER_KFUNC) || defined(LINUX_VER_5_2_PLUS) +#ifdef USE_SOCKET_TRACE_INLINE_OUTPUT static __inline int __output_data_common(void *ctx, struct tracer_ctx_s *tracer_ctx, struct __socket_data_buffer *v_buff, diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index dc326cd6d1e..7971aba1bd0 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -73,6 +73,18 @@ def read_source(path: Path) -> str: "__u64 pid_tgid = bpf_get_current_pid_tgid();" in data_submit_text, "__data_submit must define pid_tgid before EXTENDED_AI_AGENT_FILE_IO branch uses it", ) +require( + re.search( + r"#ifdef USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS\s+" + r"if \(extra->source == DATA_SOURCE_SYSCALL\) \{\s+" + r"struct tail_calls_context \*context =\s+" + r"\(struct tail_calls_context \*\)v->data;.*?" + r"return SUBMIT_OK;\s+\}\s+#endif\s+#ifdef USE_SOCKET_TRACE_INLINE_OUTPUT", + data_submit_text, + re.S, + ), + "__data_submit must keep 5.2_plus syscall traffic on the tail-call output path before inline output fallback", +) push_close_end = socket_trace_text.find("\n}\n\n#ifdef SUPPORTS_KPROBE_ONLY", push_close_start) require(push_close_end != -1, "missing __push_close_event end") @@ -141,6 +153,31 @@ def read_source(path: Path) -> str: "AI Agent access_permission extraction must be guarded by EXTENDED_AI_AGENT_FILE_IO_FULL", ) +require( + "#define USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS 1" in socket_trace_text + and "defined(LINUX_VER_5_2_PLUS)" in socket_trace_text + and "defined(EXTENDED_AI_AGENT_FILE_IO)" in socket_trace_text, + "5.2_plus socket-trace must re-enable syscall tail-call splitting when AI Agent governance is compiled in", +) + +for helper_name in ( + "process_syscall_data", + "process_syscall_data_vecs", +): + helper_idx = socket_trace_text.find(helper_name) + require(helper_idx != -1, f"missing {helper_name}") + helper_text = socket_trace_text[helper_idx : helper_idx + 2200] + require( + "USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS" in helper_text, + f"{helper_name} must use the syscall tail-call split guard", + ) + +require( + "USE_SOCKET_TRACE_SYSCALL_TAIL_CALLS" in socket_trace_text + and "extra->source == DATA_SOURCE_SYSCALL" in socket_trace_text, + "5.2_plus AI Agent tail-call split must be limited to syscall source paths", +) + require('"lsm/"' in load_text, "load.c must recognize lsm/ section prefix") require( "BPF_PROG_TYPE_LSM" in load_text, From c59847eaad564469e7de6f5eb18d895cd7c601bb Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 27 May 2026 10:17:56 +0800 Subject: [PATCH 22/24] fix(agent): deduplicate exec audit events --- agent/crates/enterprise-utils/src/lib.rs | 8 ++++++ agent/src/config/handler.rs | 2 +- .../test/test_ai_agent_source_contracts.py | 4 +-- agent/src/ebpf_dispatcher.rs | 27 ++++++++++++++++++- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index fc5a09de8f5..00d3c87a0f6 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -548,6 +548,14 @@ pub mod ai_agent_enforcement { pub rule_index: u32, pub rule_id: String, pub mode: EnforcementMode, + pub kernel_event_source: KernelEventSource, + } + + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum KernelEventSource { + None, + Lsm, + KprobeOverride, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index 206f9f3d036..fd978ead266 100644 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -6196,7 +6196,7 @@ mod tests { ); assert_eq!( config.ai_agent_enforcement.allowed_mechanisms, - vec!["lsm", "kprobe_override", "sigkill", "seccomp"] + vec!["lsm", "kprobe_override"] ); assert!(config.ai_agent_enforcement.rules.is_empty()); } diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 7971aba1bd0..d27c77dac2a 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -402,8 +402,8 @@ def read_source(path: Path) -> str: "standalone exec override wrapper must define shared map symbols and include only exec override kprobes", ) require( - "buf->arg.bytes,\n\t\t\t\t AI_AGENT_EXEC_OVERRIDE_ARG_LEN" in exec_override_text, - "AI Agent exec override must emit argv cmdline with the 64-byte argv buffer size, not the 256-byte path size", + "buf->path,\n\t\t\t\t AI_AGENT_EXEC_PATTERN_LEN" in exec_override_text, + "AI Agent exec override must report exec_path as cmdline placeholder instead of a partial argv slot", ) syscall_override_bpf = ENTERPRISE_BPF / "ai_agent_syscall_override.bpf.c" require( diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index d7a85a48456..b151704689d 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -192,7 +192,7 @@ fn fill_ai_agent_root_pid(event: &mut BoxedProcEvents) { #[cfg(feature = "enterprise")] #[allow(static_mut_refs)] fn emit_ai_agent_enforcement_audit_event(event: &BoxedProcEvents) { - use enterprise_utils::ai_agent_enforcement::EnforcementMode; + use enterprise_utils::ai_agent_enforcement::{EnforcementMode, KernelEventSource}; if event.0.ai_agent_root_pid == 0 { return; @@ -214,6 +214,17 @@ fn emit_ai_agent_enforcement_audit_event(event: &BoxedProcEvents) { if hit.mode != EnforcementMode::AuditOnly { return; } + match hit.kernel_event_source { + KernelEventSource::Lsm if AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.load(Ordering::Relaxed) => { + return; + } + KernelEventSource::KprobeOverride + if AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.load(Ordering::Relaxed) => + { + return; + } + _ => {} + } let Some(audit_event) = event .0 .new_proc_block_event_for_audit(&hit.rule_id, policy.epoch) @@ -690,6 +701,10 @@ static AI_AGENT_SYSCALL_RULES_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] static AI_AGENT_POLICY_EPOCH_MAP_FD: AtomicI32 = AtomicI32::new(-1); #[cfg(feature = "enterprise")] +static AI_AGENT_EXEC_LSM_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false); +#[cfg(feature = "enterprise")] +static AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false); +#[cfg(feature = "enterprise")] const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256; #[cfg(feature = "enterprise")] const AI_AGENT_SYSCALL_RULES_BPF_MAX: usize = 32; @@ -1716,6 +1731,8 @@ impl EbpfCollector { let max_syscall_records = config.max_rules.min(AI_AGENT_SYSCALL_RULES_BPF_MAX); if !config.enabled { set_global_exec_policy(None); + AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.store(false, Ordering::Relaxed); + AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.store(false, Ordering::Relaxed); Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); Self::clear_ai_agent_syscall_enforcement_bpf_maps(max_syscall_records); return; @@ -1750,6 +1767,8 @@ impl EbpfCollector { Err(e) => { warn!("AI Agent enforcement: failed to compile policy: {}", e); set_global_exec_policy(None); + AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.store(false, Ordering::Relaxed); + AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.store(false, Ordering::Relaxed); Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); return; } @@ -1775,10 +1794,16 @@ impl EbpfCollector { set_global_exec_policy(None); } } + AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.store(false, Ordering::Relaxed); + AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.store(false, Ordering::Relaxed); Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); return; } + AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.store(lsm_allowed, Ordering::Relaxed); + AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.store(kprobe_override_allowed, Ordering::Relaxed); } else { + AI_AGENT_EXEC_LSM_EVENTS_ACTIVE.store(false, Ordering::Relaxed); + AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE.store(false, Ordering::Relaxed); Self::clear_ai_agent_exec_enforcement_bpf_maps(max_exec_records); } From 8f247ea302179899670360f55e5dacf9d5650408 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 28 May 2026 09:57:07 +0800 Subject: [PATCH 23/24] fix(agent): retry block event delivery and backfill best effort --- agent/src/common/proc_event/linux.rs | 122 ++++++++++++++++++ .../test/test_ai_agent_source_contracts.py | 14 ++ agent/src/ebpf_dispatcher.rs | 113 ++++++++++++++++ 3 files changed, 249 insertions(+) diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 98d46aa7179..7f044254df7 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -319,11 +319,21 @@ pub struct ProcLifecycleExecInfo<'a> { pub parent_pid: u32, pub uid: u32, pub gid: u32, + pub timestamp: u64, pub comm: &'a [u8], pub cmdline: &'a [u8], pub exec_path: &'a [u8], } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ProcBlockInfo<'a> { + pub pid: u32, + pub action: u8, + pub timestamp: u64, + pub rule_id: &'a str, + pub exec_path: &'a [u8], +} + impl TryFrom<&[u8]> for ProcLifecycleEventData { type Error = Error; @@ -406,7 +416,9 @@ const PROC_BLOCK_CMDLINE_LEN: usize = 256; const PROC_BLOCK_EXEC_PATH_LEN: usize = 256; const ENFORCEMENT_TARGET_EXEC: u8 = 1; const ENFORCEMENT_ACTION_AUDIT: u8 = 1; +const ENFORCEMENT_ACTION_DENY: u8 = 2; const ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT: u8 = 5; +const ENFORCEMENT_GUARANTEE_BEST_EFFORT: u8 = 2; const ENFORCEMENT_GUARANTEE_AUDIT_ONLY: u8 = 3; struct ProcBlockEventData { @@ -725,6 +737,7 @@ impl ProcEvent { parent_pid: data.parent_pid, uid: data.uid, gid: data.gid, + timestamp: data.timestamp, comm: &data.comm, cmdline: &data.cmdline, exec_path: &data.exec_path, @@ -734,6 +747,19 @@ impl ProcEvent { } } + pub fn proc_block_info(&self) -> Option> { + match &self.event_data { + EventData::ProcBlockEvent(data) => Some(ProcBlockInfo { + pid: data.pid, + action: data.action, + timestamp: data.timestamp, + rule_id: data.rule_id.as_str(), + exec_path: &data.exec_path, + }), + _ => None, + } + } + pub fn new_proc_block_event_for_audit( &self, rule_id: &str, @@ -786,6 +812,59 @@ impl ProcEvent { event_data: EventData::ProcBlockEvent(block_event), }))) } + + pub fn new_proc_block_event_for_best_effort( + &self, + rule_id: &str, + policy_epoch: u64, + ) -> Option { + let data = match &self.event_data { + EventData::ProcLifecycleEvent(data) if data.lifecycle_type == PROC_LIFECYCLE_EXEC => { + data + } + _ => return None, + }; + let process_kname = if data.comm.is_empty() { + self.process_kname.clone() + } else { + data.comm.clone() + }; + let root_pid = self.ai_agent_root_pid; + let block_event = ProcBlockEventData { + rule_id: rule_id.chars().take(PROC_BLOCK_RULE_ID_LEN).collect(), + target_type: ENFORCEMENT_TARGET_EXEC, + action: ENFORCEMENT_ACTION_DENY, + mechanism: enforcement_mechanism_name(ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT) + .to_string(), + guarantee: enforcement_guarantee_name(ENFORCEMENT_GUARANTEE_BEST_EFFORT).to_string(), + errno: 1, + pid: data.pid, + parent_pid: data.parent_pid, + ai_agent_root_pid: root_pid, + uid: data.uid, + gid: data.gid, + comm: process_kname.clone(), + cmdline: truncate_bytes(&data.cmdline, PROC_BLOCK_CMDLINE_LEN), + exec_path: truncate_bytes(&data.exec_path, PROC_BLOCK_EXEC_PATH_LEN), + syscall_name: String::new(), + syscall_id: 0, + timestamp: data.timestamp, + policy_epoch, + }; + + Some(BoxedProcEvents(Box::new(ProcEvent { + pid: data.pid, + pod_id: self.pod_id, + ai_agent_root_pid: root_pid, + thread_id: self.thread_id, + coroutine_id: self.coroutine_id, + process_kname, + start_time: self.start_time, + end_time: self.end_time, + event_type: EventType::ProcBlockEvent, + event_data: EventData::ProcBlockEvent(block_event), + }))) + } } fn truncate_bytes(bytes: &[u8], limit: usize) -> Vec { @@ -1025,6 +1104,49 @@ mod tests { assert_eq!(block.exec_path, b"/sbin/reboot"); } + #[test] + fn test_new_proc_block_event_for_best_effort_encodes_proc_block_event() { + let proc_event = ProcEvent { + pid: 13, + pod_id: 7, + ai_agent_root_pid: 100, + thread_id: 13, + coroutine_id: 0, + process_kname: b"reboot".to_vec(), + start_time: 42, + end_time: 43, + event_type: EventType::ProcLifecycleEvent, + event_data: EventData::ProcLifecycleEvent(ProcLifecycleEventData { + lifecycle_type: PROC_LIFECYCLE_EXEC, + pid: 13, + parent_pid: 10, + uid: 1000, + gid: 1000, + timestamp: 42, + comm: b"reboot".to_vec(), + cmdline: b"reboot now".to_vec(), + exec_path: b"/sbin/reboot".to_vec(), + }), + }; + + let boxed = proc_event + .new_proc_block_event_for_best_effort("block-reboot", 99) + .unwrap(); + let mut buf = Vec::new(); + boxed.encode(&mut buf).unwrap(); + let pb = metric::ProcEvent::decode(buf.as_slice()).unwrap(); + let block = pb.proc_block_event_data.unwrap(); + + assert_eq!(pb.event_type, metric::EventType::ProcBlockEvent as i32); + assert_eq!(block.rule_id, "block-reboot"); + assert_eq!(block.action, metric::EnforcementAction::Deny as i32); + assert_eq!(block.mechanism, "user_space_audit"); + assert_eq!(block.guarantee, "best_effort"); + assert_eq!(block.errno, 1); + assert_eq!(block.ai_agent_root_pid, 100); + assert_eq!(block.exec_path, b"/sbin/reboot"); + } + fn make_proc_block_raw( target_type: u8, action: u8, diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index d27c77dac2a..76370006f10 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -405,6 +405,20 @@ def read_source(path: Path) -> str: "buf->path,\n\t\t\t\t AI_AGENT_EXEC_PATTERN_LEN" in exec_override_text, "AI Agent exec override must report exec_path as cmdline placeholder instead of a partial argv slot", ) + file_io_bpf = ENTERPRISE_BPF / "ai_agent_file_io.bpf.c" + file_io_text = read_source(file_io_bpf) + require( + "for (__u32 attempt = 0; attempt < 3; attempt++)" in exec_override_standalone_text + and "ret = bpf_perf_event_output(" in exec_override_standalone_text + and "if (ret >= 0)" in exec_override_standalone_text, + "standalone exec override helper must retry perf event output up to 3 attempts", + ) + require( + "for (__u32 attempt = 0; attempt < 3; attempt++)" in file_io_text + and "ret = bpf_perf_event_output(" in file_io_text + and "if (ret >= 0)" in file_io_text, + "shared AI Agent event helper must retry perf event output up to 3 attempts", + ) syscall_override_bpf = ENTERPRISE_BPF / "ai_agent_syscall_override.bpf.c" require( syscall_override_bpf.exists(), diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index b151704689d..ba7b5d4b3f7 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -40,6 +40,8 @@ fn main() { #[cfg(feature = "extended_observability")] pub mod memory_profile; +#[cfg(feature = "enterprise")] +use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::ptr::{self, null_mut}; use std::slice; @@ -47,6 +49,8 @@ use std::slice; use std::sync::atomic::AtomicI32; use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; use std::sync::Arc; +#[cfg(feature = "enterprise")] +use std::sync::{Mutex, OnceLock}; use std::thread::{self, JoinHandle}; use std::time::Duration; @@ -241,6 +245,98 @@ fn emit_ai_agent_enforcement_audit_event(event: &BoxedProcEvents) { } } +#[cfg(feature = "enterprise")] +fn kernel_block_event_cache() -> &'static Mutex> { + RECENT_KERNEL_BLOCK_EVENTS.get_or_init(|| Mutex::new(HashMap::new())) +} + +#[cfg(feature = "enterprise")] +fn prune_kernel_block_event_cache(cache: &mut HashMap, now: u64) { + cache.retain(|_, ts| now.saturating_sub(*ts) <= KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS); +} + +#[cfg(feature = "enterprise")] +fn record_kernel_block_event(event: &BoxedProcEvents) { + let Some(info) = event.0.proc_block_info() else { + return; + }; + if info.action != metric::EnforcementAction::Deny as u8 || info.exec_path.is_empty() { + return; + } + let mut cache = kernel_block_event_cache().lock().unwrap(); + prune_kernel_block_event_cache(&mut cache, info.timestamp); + cache.insert( + KernelBlockMarkerKey { + pid: info.pid, + rule_id: info.rule_id.to_string(), + exec_path: info.exec_path.to_vec(), + }, + info.timestamp, + ); +} + +#[cfg(feature = "enterprise")] +fn consume_recent_kernel_block_event(pid: u32, rule_id: &str, exec_path: &[u8], now: u64) -> bool { + let mut cache = kernel_block_event_cache().lock().unwrap(); + prune_kernel_block_event_cache(&mut cache, now); + cache + .remove(&KernelBlockMarkerKey { + pid, + rule_id: rule_id.to_string(), + exec_path: exec_path.to_vec(), + }) + .map(|ts| now.saturating_sub(ts) <= KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS) + .unwrap_or(false) +} + +#[cfg(feature = "enterprise")] +#[allow(static_mut_refs)] +fn emit_ai_agent_enforcement_best_effort_event(event: &BoxedProcEvents) { + use enterprise_utils::ai_agent_enforcement::EnforcementMode; + + if event.0.ai_agent_root_pid == 0 { + return; + } + let Some(exec_info) = event.0.proc_lifecycle_exec_info() else { + return; + }; + if exec_info.exec_path.is_empty() { + return; + } + let Some(policy) = enterprise_utils::ai_agent_enforcement::global_exec_policy() else { + return; + }; + let exec_path = String::from_utf8_lossy(exec_info.exec_path); + let cmdline = String::from_utf8_lossy(exec_info.cmdline); + let Some(hit) = policy.match_exec(&exec_path, &cmdline) else { + return; + }; + if hit.mode != EnforcementMode::Block { + return; + } + if consume_recent_kernel_block_event( + exec_info.pid, + &hit.rule_id, + exec_info.exec_path, + exec_info.timestamp, + ) { + return; + } + let Some(best_effort_event) = event + .0 + .new_proc_block_event_for_best_effort(&hit.rule_id, policy.epoch) + else { + return; + }; + unsafe { + if let Some(sender) = PROC_EVENT_SENDER.as_mut() { + if let Err(e) = sender.send(best_effort_event) { + warn!("ai agent enforcement best_effort event send error: {:?}", e); + } + } + } +} + impl OwnedCountable for SyncEbpfCounter { fn get_counters(&self) -> Vec { let rx = self.counter.rx.swap(0, Ordering::Relaxed); @@ -705,9 +801,22 @@ static AI_AGENT_EXEC_LSM_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false); #[cfg(feature = "enterprise")] static AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false); #[cfg(feature = "enterprise")] +static RECENT_KERNEL_BLOCK_EVENTS: OnceLock>> = + OnceLock::new(); +#[cfg(feature = "enterprise")] const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256; #[cfg(feature = "enterprise")] const AI_AGENT_SYSCALL_RULES_BPF_MAX: usize = 32; +#[cfg(feature = "enterprise")] +const KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS: u64 = 5_000_000_000; + +#[cfg(feature = "enterprise")] +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +struct KernelBlockMarkerKey { + pid: u32, + rule_id: String, + exec_path: Vec, +} #[cfg(feature = "enterprise")] fn ai_agent_enforcement_mode_eq(value: &str, expected: &str) -> bool { @@ -922,7 +1031,11 @@ impl EbpfCollector { #[cfg(feature = "enterprise")] fill_ai_agent_root_pid(&mut event); #[cfg(feature = "enterprise")] + record_kernel_block_event(&event); + #[cfg(feature = "enterprise")] emit_ai_agent_enforcement_audit_event(&event); + #[cfg(feature = "enterprise")] + emit_ai_agent_enforcement_best_effort_event(&event); if let Err(e) = PROC_EVENT_SENDER.as_mut().unwrap().send(event) { warn!("event send ebpf error: {:?}", e); } From cab55fa9bfcb005d9c07936f950b509ae3ec78e4 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Sat, 30 May 2026 13:56:52 +0800 Subject: [PATCH 24/24] feat(agent): support ai agent exec for cmdline prefixes --- agent/crates/enterprise-utils/src/lib.rs | 2 +- agent/src/config/config.rs | 2 +- agent/src/config/handler.rs | 29 ++++ .../test/test_ai_agent_source_contracts.py | 149 ++++++++++++++++-- agent/src/ebpf_dispatcher.rs | 2 +- server/agent_config/README-CH.md | 18 ++- server/agent_config/README.md | 18 ++- server/agent_config/template.yaml | 12 +- 8 files changed, 203 insertions(+), 29 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 00d3c87a0f6..3e4abc65b6c 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -518,7 +518,7 @@ pub mod ai_agent_enforcement { pub prefix: Vec, pub suffix: Vec, pub argv_matches: Vec, - pub argv_contains_any: Vec, + pub cmdline_prefixes: Vec, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 0fcfc0a91b0..58d8b6e8c4d 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -705,7 +705,7 @@ pub struct AiAgentExecMatch { pub prefix: Vec, pub suffix: Vec, pub argv_matches: Vec, - pub argv_contains_any: Vec, + pub cmdline_prefixes: Vec, } #[derive(Clone, Debug, Deserialize, PartialEq, Eq)] diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index fd978ead266..601b6b83c66 100644 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -6201,6 +6201,35 @@ mod tests { assert!(config.ai_agent_enforcement.rules.is_empty()); } + #[test] + fn test_ai_agent_enforcement_parses_cmdline_prefixes() { + let yaml = r#" +ai_agent: + enforcement: + enabled: true + mode: block + rules: + - id: block-systemctl-reboot + scope: ai_agent_tree + target_type: exec + action: + type: deny + exec: + exact: + - /usr/bin/systemctl + cmdline_prefixes: + - systemctl reboot +"#; + let proc: crate::config::config::Proc = serde_yaml::from_str(yaml).unwrap(); + let enforcement = &proc.ai_agent.enforcement; + assert!(enforcement.enabled); + assert_eq!(enforcement.rules.len(), 1); + assert_eq!( + enforcement.rules[0].exec.cmdline_prefixes, + vec!["systemctl reboot".to_string()] + ); + } + #[test] fn test_log_parser_debug_includes_ai_agent_enforcement() { let mut config = LogParserConfig::default(); diff --git a/agent/src/ebpf/test/test_ai_agent_source_contracts.py b/agent/src/ebpf/test/test_ai_agent_source_contracts.py index 76370006f10..b5c047c9fd6 100644 --- a/agent/src/ebpf/test/test_ai_agent_source_contracts.py +++ b/agent/src/ebpf/test/test_ai_agent_source_contracts.py @@ -15,6 +15,8 @@ TRACER_C = ROOT / "user" / "tracer.c" WORKSPACE_ROOT = ROOT.parents[3] ENTERPRISE_AGENT = WORKSPACE_ROOT / "deepflow-core" / "agent" +if not ENTERPRISE_AGENT.exists(): + ENTERPRISE_AGENT = WORKSPACE_ROOT / "agent" ENTERPRISE_BPF = ENTERPRISE_AGENT / "src" / "ebpf" / "user" / "extended" / "bpf" ENTERPRISE_SUPPORT = ENTERPRISE_AGENT / "scripts" / "support_extended_observability" ENTERPRISE_FEATURE_TOP = ENTERPRISE_AGENT / "src" / "ebpf" / "user" / "extended" / "feature.top.mk" @@ -227,6 +229,7 @@ def read_source(path: Path) -> str: if ENTERPRISE_AGENT.exists(): exec_enforce_bpf = ENTERPRISE_BPF / "ai_agent_exec_enforce.bpf.c" exec_common_bpf = ENTERPRISE_BPF / "ai_agent_exec_common.bpf.h" + enforcement_common_bpf = ENTERPRISE_BPF / "ai_agent_enforcement_common.bpf.h" require( exec_enforce_bpf.exists(), f"missing enterprise AI Agent exec enforcement BPF: {exec_enforce_bpf}", @@ -235,8 +238,13 @@ def read_source(path: Path) -> str: exec_common_bpf.exists(), f"missing enterprise AI Agent exec common BPF header: {exec_common_bpf}", ) + require( + enforcement_common_bpf.exists(), + f"missing enterprise AI Agent enforcement common BPF header: {enforcement_common_bpf}", + ) exec_enforce_text = read_source(exec_enforce_bpf) exec_common_text = read_source(exec_common_bpf) + enforcement_common_text = read_source(enforcement_common_bpf) support_text = read_source(ENTERPRISE_SUPPORT) feature_top_text = read_source(ENTERPRISE_FEATURE_TOP) @@ -285,6 +293,28 @@ def read_source(path: Path) -> str: and "ai_agent_exec_collect_path_facts" in exec_common_text, "AI Agent exec enforcement BPF must support suffix path matching with precomputed hashes", ) + path_facts_start = exec_common_text.find("ai_agent_exec_collect_path_facts(") + path_facts_end = exec_common_text.find("ai_agent_hash_exec_path", path_facts_start) + require( + path_facts_start != -1 and path_facts_end != -1, + "AI Agent exec BPF must keep path fact helper recognizable", + ) + path_facts_text = exec_common_text[path_facts_start:path_facts_end] + require( + re.search(r"#define\s+AI_AGENT_EXEC_PATTERN_SCAN_LEN\s+64", exec_common_text) + and "#ifdef AI_AGENT_EXEC_UNROLL_PATH_FACTS" in path_facts_text + and "#pragma unroll" in path_facts_text + and "for (__u32 i = 0; i < AI_AGENT_EXEC_PATTERN_SCAN_LEN; i++)" in path_facts_text + and "if (!ended || len == 0)" in path_facts_text, + "AI Agent exec path fact collection must scan a verifier-friendly 64-byte prefix and only unroll in small enforcement objects", + ) + require( + "AI_AGENT_EXEC_CMDLINE_PREFIX_LEN" in exec_common_text + and "cmdline_prefix_len" in exec_common_text + and "cmdline_prefix_words" in exec_common_text + and "cmdline_prefix_masks" in exec_common_text, + "AI Agent exec BPF record must carry fixed cmdline prefix word/mask data", + ) require( "ai_agent_exec_starts_with" not in exec_enforce_text and "ai_agent_exec_ends_with" not in exec_enforce_text @@ -294,17 +324,22 @@ def read_source(path: Path) -> str: require( "ai_agent_exec_argv_hashes" not in exec_enforce_text and "ai_agent_update_argv_match_bits" not in exec_enforce_text - and "ai_agent_cmdline_contains" not in exec_enforce_text, + and "ai_agent_cmdline_" + "contains" not in exec_enforce_text, "AI Agent exec LSM enforcement must stay path-only; argv matching belongs in small kprobe override programs", ) lsm_body = exec_enforce_text[ exec_enforce_text.find('SEC("lsm/bprm_check_security")') : ] require( - "rule->argv_pattern_len != 0" in exec_enforce_text + "rule->argv_pattern_len != 0 || rule->cmdline_prefix_len != 0" + in exec_enforce_text and "argv_pattern" not in lsm_body and "argv_pattern_hash" not in lsm_body, - "AI Agent exec LSM hook must ignore argv-qualified rules to avoid blocking path-only false positives", + "AI Agent exec LSM hook must ignore argv/cmdline-qualified rules to avoid path-only false positives", + ) + require( + "#define AI_AGENT_EXEC_UNROLL_PATH_FACTS" in exec_enforce_text, + "AI Agent exec LSM hook must unroll the 64-byte path fact helper to avoid bounded-loop verifier state explosion", ) require( "pattern_hash" in exec_common_text @@ -315,6 +350,11 @@ def read_source(path: Path) -> str: "ai_agent_submit_event" in exec_common_text, "AI Agent exec enforcement must submit events through the AI Agent pipeline", ) + require( + "__builtin_memset(event, 0, sizeof(*event))" not in enforcement_common_text + and "__builtin_memset(dst, 0, dst_sz)" not in enforcement_common_text, + "AI Agent enforcement event helpers must avoid large BPF memset expansions that push kprobe override programs over the 4096-insn limit", + ) require( "cmdline_src_sz" in exec_common_text and "cmdline, cmdline_src_sz" in exec_common_text @@ -339,12 +379,12 @@ def read_source(path: Path) -> str: exec_override_standalone_text = read_source(exec_override_standalone_bpf) exec_bpf_text = "\n".join((exec_enforce_text, exec_common_text, exec_override_text)) for forbidden in ( - "argv_contains_any", - "AI_AGENT_EXEC_MATCH_ARGV_CONTAINS", - "ARGV_CONTAINS", - "cmdline_regex", - "ai_agent_cmdline_contains", - "cmdline_contains", + "_".join(("argv", "contains", "any")), + "AI_AGENT_EXEC_MATCH_" + "ARGV_" + "CONTAINS", + "ARGV_" + "CONTAINS", + "cmdline_" + "regex", + "ai_agent_cmdline_" + "contains", + "cmdline_" + "contains", ): require( forbidden not in exec_bpf_text, @@ -358,11 +398,18 @@ def read_source(path: Path) -> str: ) require( re.search(r"#define\s+AI_AGENT_EXEC_OVERRIDE_ARG_LEN\s+64", exec_override_text) + and re.search(r"#define\s+AI_AGENT_EXEC_CMDLINE_LEN\s+64", exec_override_text) + and re.search(r"#define\s+AI_AGENT_EXEC_CMDLINE_MAX_ARGS\s+4", exec_override_text) + and re.search(r"#define\s+AI_AGENT_EXEC_CMDLINE_ARG_LEN\s+32", exec_override_text) and "ai_agent_exec_override_read_argv_index" in exec_override_text and "rule->argv_index" in exec_override_text and "rule->argv_op != AI_AGENT_EXEC_ARGV_OP_EXACT" in exec_override_text and "ai_agent_exec_override_arg_matches" in exec_override_text, - "AI Agent exec override must read only the configured argv index", + "AI Agent exec override must read only bounded argv/cmdline data that old verifiers can load", + ) + require( + "#define AI_AGENT_EXEC_UNROLL_PATH_FACTS" in exec_override_text, + "AI Agent exec override must unroll the 64-byte path fact helper to avoid bounded-loop verifier state explosion", ) require( "ai_agent_exec_override_read_syscall_arg" in exec_override_text @@ -379,6 +426,83 @@ def read_source(path: Path) -> str: and "rule->argv_pattern," not in exec_override_text, "AI Agent exec override must compare argv by fixed len+word chunks, not by scanning policy argv_pattern from map values", ) + require( + "ai_agent_exec_override_build_cmdline" in exec_override_text + and "AI_AGENT_EXEC_CMDLINE_MAX_ARGS" in exec_override_text + and "AI_AGENT_EXEC_CMDLINE_LEN" in exec_override_text + and "cmdline_len" in exec_override_text, + "AI Agent exec override must build a bounded cmdline buffer from argv", + ) + require( + "ai_agent_exec_override_cmdline_prefix_matches" in exec_override_text + and "cmdline_prefix_masks" in exec_override_text, + "AI Agent exec override must match cmdline prefixes by fixed word masks", + ) + cmdline_prefix_match_start = exec_override_text.find( + "ai_agent_exec_override_cmdline_prefix_matches(" + ) + cmdline_prefix_match_end = exec_override_text.find( + "ai_agent_exec_override_path_matches", cmdline_prefix_match_start + ) + require( + cmdline_prefix_match_start != -1 and cmdline_prefix_match_end != -1, + "AI Agent exec override must keep cmdline prefix match helper recognizable", + ) + cmdline_prefix_match_text = exec_override_text[ + cmdline_prefix_match_start:cmdline_prefix_match_end + ] + require( + re.search(r"#define\s+AI_AGENT_EXEC_CMDLINE_PREFIX_MATCH_WORDS\s+8", exec_common_text) + and "#pragma clang loop unroll(disable)" in cmdline_prefix_match_text + and "i < AI_AGENT_EXEC_CMDLINE_PREFIX_MATCH_WORDS" in cmdline_prefix_match_text, + "AI Agent exec override must limit cmdline prefix matching to 8 words and prevent clang from auto-unrolling the comparison loop", + ) + append_arg_start = exec_override_text.find( + "ai_agent_exec_override_cmdline_append_arg(" + ) + read_argv_ptr_start = exec_override_text.find( + "ai_agent_exec_override_read_argv_ptr", append_arg_start + ) + build_cmdline_start = exec_override_text.find( + "ai_agent_exec_override_build_cmdline(" + ) + read_argv_index_start = exec_override_text.find( + "ai_agent_exec_override_read_argv_index", build_cmdline_start + ) + require( + append_arg_start != -1 + and read_argv_ptr_start != -1 + and build_cmdline_start != -1 + and read_argv_index_start != -1, + "AI Agent exec override must keep cmdline build helpers recognizable", + ) + cmdline_append_arg_text = exec_override_text[append_arg_start:read_argv_ptr_start] + build_cmdline_text = exec_override_text[build_cmdline_start:read_argv_index_start] + require( + "#pragma unroll" in cmdline_append_arg_text + and "#pragma unroll" not in build_cmdline_text + and "#pragma clang loop" not in build_cmdline_text, + "AI Agent exec override must unroll only the 32-byte arg copy loop and keep argv iteration bounded", + ) + require( + "ai_agent_exec_override_arg_reset" in exec_override_text + and "__builtin_memset(buf, 0, sizeof(*buf))" not in exec_override_text + and "__builtin_memset(buf->arg.bytes" not in exec_override_text + and "__builtin_memset(buf->cmdline.bytes" not in exec_override_text + and "__builtin_memset(buf->cmdline_arg.bytes" not in exec_override_text, + "AI Agent exec override must avoid large scratch-buffer memset expansions; reset only fields that need deterministic contents", + ) + require( + "cmdline.bytes[buf->cmdline_len]" not in exec_override_text, + "AI Agent exec override must not index cmdline with map-value cmdline_len directly; old verifiers reject the pointer arithmetic", + ) + require( + exec_override_text.count( + "((__u32)buf->cmdline_len) & (AI_AGENT_EXEC_CMDLINE_LEN - 1)" + ) + >= 2, + "AI Agent exec override must mask map-value cmdline_len before range checks so verifier sees a bounded non-negative index", + ) require( "df_K_ai_agent_exec_override_" in tracer_c_text and "df_K_ai_agent_exec_override_" in load_text, @@ -402,8 +526,9 @@ def read_source(path: Path) -> str: "standalone exec override wrapper must define shared map symbols and include only exec override kprobes", ) require( - "buf->path,\n\t\t\t\t AI_AGENT_EXEC_PATTERN_LEN" in exec_override_text, - "AI Agent exec override must report exec_path as cmdline placeholder instead of a partial argv slot", + "buf->cmdline.bytes,\n\t\t\t\t AI_AGENT_EXEC_CMDLINE_LEN" + in exec_override_text, + "AI Agent exec override must report the real bounded cmdline instead of exec_path placeholder", ) file_io_bpf = ENTERPRISE_BPF / "ai_agent_file_io.bpf.c" file_io_text = read_source(file_io_bpf) diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index ba7b5d4b3f7..e52cfb3fd0d 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -903,7 +903,7 @@ fn ai_agent_exec_enforcement_inputs( }, ) .collect(), - argv_contains_any: rule.exec.argv_contains_any.clone(), + cmdline_prefixes: rule.exec.cmdline_prefixes.clone(), } }) .collect() diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index a58dea557bf..ac856955288 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -2648,9 +2648,9 @@ inputs: - 每个 `exec.exact` 会占用 1 条 exec BPF record。 - 每个 `exec.suffix` 会占用 1 条 exec BPF record。 -- 如果配置了 `argv_matches`,还会按 `argv_matches` 数量放大。 -- 只审计规则如果最终会编译成 `exact` / `suffix` / `argv_matches` 形式的 exec record,也会计入这里。 -- 纯 `exec.prefix` / `exec.argv_contains_any` 审计规则只在用户态匹配,不消耗 exec BPF record。 +- 如果配置了 `argv_matches` 或 `cmdline_prefixes`,还会按选择器数量放大。 +- 只审计规则如果最终会编译成 `exact` / `suffix` / `argv_matches` / `cmdline_prefixes` 形式的 exec record,也会计入这里。 +- 纯 `exec.prefix` 审计规则只在用户态匹配,不消耗 exec BPF record。 - 当前 exec 强阻断上限是 `256` 条编译后的 record。 - direct syscall 路径使用单独的固定 map,并且支持的 syscall 集更小。 @@ -2693,7 +2693,7 @@ AI Agent 执行阻断规则。 - `exec.exact` / `exec.suffix`:强阻断选择器 - `exec.prefix`:仅用户态审计选择器;4.18 上不参与强阻断 - `exec.argv_matches`:强阻断只支持 `index: 0..3`、`op: exact`,并且必须搭配 `exact` 或 `suffix` -- `exec.argv_contains_any`:仅用户态审计选择器,不参与强阻断 +- `exec.cmdline_prefixes`:强阻断支持 256B 截断 `cmdline` 前缀匹配,并且必须搭配 `exact` 或 `suffix` 路径选择器。 - `syscall.names` / `syscall.symbols`:当前 direct syscall 仅支持 `reboot`、`init_module`、`finit_module`、`delete_module`、`kexec_load`,并且无论阻断还是审计都依赖 `kprobe_override` 支持 推荐示例: @@ -2740,6 +2740,16 @@ inputs: - index: 1 op: exact value: reboot + - id: block-systemctl-reboot-cmdline + scope: ai_agent_tree + target_type: exec + action: + type: deny + exec: + exact: + - /usr/bin/systemctl + cmdline_prefixes: + - systemctl reboot - id: block-direct-reboot scope: ai_agent_tree target_type: syscall diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 2af754059f3..0466b7b3066 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -2681,9 +2681,9 @@ Notes: - Each `exec.exact` entry consumes 1 exec BPF record. - Each `exec.suffix` entry consumes 1 exec BPF record. -- If `argv_matches` is present, the record count is multiplied by the number of `argv_matches`. -- Audit-only rules still count when they compile into `exact` / `suffix` / `argv_matches`-backed exec records. -- Pure `exec.prefix` / `exec.argv_contains_any` audit rules stay in user space and do not consume exec BPF records. +- If `argv_matches` or `cmdline_prefixes` is present, the record count is multiplied by the selector count. +- Audit-only rules still count when they compile into `exact` / `suffix` / `argv_matches` / `cmdline_prefixes`-backed exec records. +- Pure `exec.prefix` audit rules stay in user space and do not consume exec BPF records. - Current exec strong-block cap is `256` compiled records. - The direct-syscall path uses a separate fixed map and a smaller supported syscall set. @@ -2726,7 +2726,7 @@ Current effective rule fields: - `exec.exact` / `exec.suffix`: strong-block selectors. - `exec.prefix`: user-space audit-only selector; not used for strong block on 4.18. - `exec.argv_matches`: strong block supports only `index: 0..3`, `op: exact`, and it must be combined with `exact` or `suffix` path selectors. -- `exec.argv_contains_any`: user-space audit-only selector; not accepted for strong block. +- `exec.cmdline_prefixes`: strong block supports 256-byte truncated cmdline prefix matching, and it must be combined with `exact` or `suffix` path selectors. - `syscall.names` / `syscall.symbols`: current direct-syscall support is limited to `reboot`, `init_module`, `finit_module`, `delete_module`, and `kexec_load`, and both block and audit-only syscall handling still rely on `kprobe_override`. Recommended example: @@ -2773,6 +2773,16 @@ inputs: - index: 1 op: exact value: reboot + - id: block-systemctl-reboot-cmdline + scope: ai_agent_tree + target_type: exec + action: + type: deny + exec: + exact: + - /usr/bin/systemctl + cmdline_prefixes: + - systemctl reboot - id: block-direct-reboot scope: ai_agent_tree target_type: syscall diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index bbccff192af..076ad71e44a 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1861,13 +1861,13 @@ inputs: # description: # en: |- # Maximum compiled exec BPF record count, not the number of top-level rule objects. - # Each exact/suffix path selector consumes one exec record; if argv_matches is present, the record count is multiplied by the number of argv_matches. - # Audit-only rules still count when they compile into exact/suffix/argv_matches-backed exec records; pure prefix/argv_contains_any audit rules stay in user space and do not consume exec BPF records. + # Each exact/suffix path selector consumes one exec record; if argv_matches or cmdline_prefixes is present, the record count is multiplied by the selector count. + # Audit-only rules still count when they compile into exact/suffix/argv_matches/cmdline_prefixes-backed exec records; pure prefix audit rules stay in user space and do not consume exec BPF records. # Current exec strong-block cap is 256 compiled records. The direct-syscall path still uses a smaller fixed map and a fixed supported syscall set. # ch: |- # 编译后的 exec BPF record 最大条数,不是顶层规则对象数量。 - # 每个 exact/suffix 路径选择器会占用 1 条 exec record;如果配置了 argv_matches,还会按 argv_matches 数量再放大。 - # 只审计规则如果会编译成 exact/suffix/argv_matches 形式的 exec record,也会计入这里;纯 prefix/argv_contains_any 审计规则留在用户态匹配,不消耗 exec BPF record。 + # 每个 exact/suffix 路径选择器会占用 1 条 exec record;如果配置了 argv_matches 或 cmdline_prefixes,还会按选择器数量放大。 + # 只审计规则如果会编译成 exact/suffix/argv_matches/cmdline_prefixes 形式的 exec record,也会计入这里;纯 prefix 审计规则留在用户态匹配,不消耗 exec BPF record。 # 当前 exec 强阻断上限是 256 条编译后的 record;direct syscall 路径仍使用更小的固定 map 和固定 syscall 集。 max_rules: 256 # type: list @@ -1891,7 +1891,7 @@ inputs: # - exec.exact / exec.suffix: strong-block selectors # - exec.prefix: user-space audit-only selector; not used for strong block on 4.18 # - exec.argv_matches: strong block supports index 0..3, op exact, and requires exact/suffix path selectors - # - exec.argv_contains_any: user-space audit-only selector; not accepted for strong block + # - exec.cmdline_prefixes: strong block supports 256-byte truncated cmdline prefix matching and requires exact/suffix path selectors # - syscall.names / syscall.symbols: current direct-syscall support is limited to reboot, init_module, finit_module, delete_module and kexec_load, and both block/audit-only syscall handling still rely on kprobe_override support # ch: |- # AI Agent 执行阻断规则。 @@ -1904,7 +1904,7 @@ inputs: # - exec.exact / exec.suffix:强阻断选择器 # - exec.prefix:仅用户态审计选择器;4.18 上不参与强阻断 # - exec.argv_matches:强阻断只支持 index 0..3、op exact,且必须搭配 exact/suffix 路径 - # - exec.argv_contains_any:仅用户态审计选择器,不参与强阻断 + # - exec.cmdline_prefixes:强阻断支持 256B 截断 cmdline 前缀匹配,且必须搭配 exact/suffix 路径 # - syscall.names / syscall.symbols:当前 direct syscall 仅支持 reboot、init_module、finit_module、delete_module、kexec_load,且无论阻断还是审计都依赖 kprobe_override 支持 rules: [] # type: section