Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions capa/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,24 @@ def evaluate(self, features: FeatureSet, short_circuit=True):
capa.perf.counters["evaluate.feature.rule"] += 1
return self.statement.evaluate(features, short_circuit=short_circuit)

@staticmethod
def _lint_logic_edge_cases(statement: Union[Statement, Feature], is_root: bool = True) -> None:
"""
reject rule constructs known to be unsupported by the optimized matcher.
"""
if isinstance(statement, ceng.Not):
if is_root:
raise InvalidRule("top level statement may not be a `not` statement")
if isinstance(statement.child, ceng.Not):
raise InvalidRule("nested `not` statements are not supported")

if is_root and isinstance(statement, ceng.Range) and statement.min == 0 and statement.max == 0:
raise InvalidRule("top level statement may not be `count(...): 0`")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For better readability and to more clearly separate the concerns of checking root-level statements versus nested statements, you could refactor this logic. Grouping all is_root checks together at the beginning makes the function's logic easier to follow.

Suggested change
if isinstance(statement, ceng.Not):
if is_root:
raise InvalidRule("top level statement may not be a `not` statement")
if isinstance(statement.child, ceng.Not):
raise InvalidRule("nested `not` statements are not supported")
if is_root and isinstance(statement, ceng.Range) and statement.min == 0 and statement.max == 0:
raise InvalidRule("top level statement may not be `count(...): 0`")
if is_root:
if isinstance(statement, ceng.Not):
raise InvalidRule("top level statement may not be a `not` statement")
if isinstance(statement, ceng.Range) and statement.min == 0 and statement.max == 0:
raise InvalidRule("top level statement may not be `count(...): 0`")
if isinstance(statement, ceng.Not) and isinstance(statement.child, ceng.Not):
raise InvalidRule("nested `not` statements are not supported")


if isinstance(statement, Statement):
for child in statement.get_children():
Rule._lint_logic_edge_cases(child, is_root=False)

@classmethod
def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule":
meta = d["rule"]["meta"]
Expand Down Expand Up @@ -1087,7 +1105,9 @@ def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule":
if not isinstance(meta.get("mbc", []), list):
raise InvalidRule("MBC mapping must be a list")

return cls(name, scopes, build_statements(statements[0], scopes), meta, definition)
statement = build_statements(statements[0], scopes)
cls._lint_logic_edge_cases(statement)
return cls(name, scopes, statement, meta, definition)

@staticmethod
@lru_cache()
Expand Down Expand Up @@ -2109,13 +2129,10 @@ def match(
This wrapper around _match exists so that we can assert it matches precisely
the same as `capa.engine.match`, just faster.

This matcher does not handle some edge cases:
- top level NOT statements
- also top level counted features with zero occurances, like: `count(menmonic(mov)): 0`
- nested NOT statements (NOT: NOT: foo)

We should discourage/forbid these constructs from our rules and add lints for them.
TODO(williballenthin): add lints for logic edge cases
This matcher does not handle some edge cases, such as top level NOT statements,
top level counted features with zero occurrences (`count(mnemonic(mov)): 0`),
and nested NOT statements (`not: not: ...`).
These constructs are rejected during rule loading.

Args:
paranoid: when true, demonstrate that the naive matcher agrees with this
Expand Down
59 changes: 59 additions & 0 deletions tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1685,3 +1685,62 @@ def test_circular_dependency():
]
with pytest.raises(capa.rules.InvalidRule):
list(capa.rules.get_rules_and_dependencies(rules, rules[0].name))


def test_invalid_top_level_not_statement():
with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be a `not` statement"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- not:
- number: 1
"""
)
)


def test_invalid_nested_not_statement():
with pytest.raises(capa.rules.InvalidRule, match="nested `not` statements are not supported"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- mnemonic: mov
- not:
- not:
- number: 1
"""
)
)


def test_invalid_top_level_count_zero_statement():
with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be `count\\(...\\): 0`"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- count(number(100)): 0
"""
)
)
Loading