-
Notifications
You must be signed in to change notification settings - Fork 524
Introduce spam_checker_spammy internal event metadata.
#19453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e1bf75a
8c54c12
1facd0d
be62d4c
96059cb
8835bc8
be8c05b
e274e23
630ff19
e225d6d
71137f6
709631d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Introduce `spam_checker_spammy` internal event metadata. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,7 @@ enum EventInternalMetadataData { | |
| SoftFailed(bool), | ||
| ProactivelySend(bool), | ||
| PolicyServerSpammy(bool), | ||
| SpamCheckerSpammy(bool), | ||
|
Comment on lines
57
to
+58
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be an enum which would allow future adaptions like this? It makes sense that these could be both true so that may not apply in any case. I'm guessing we have some early-returns if either things marks the event as spammy though?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An enum isn't a bad idea actually, but I can't think of a pleasant way of doing that migration, which leads me to think we might be better off keeping the 2 bools but also addressing the 'early-returns' point you make.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Early-return removed at be8c05b so we evaluate both attributes
Comment on lines
57
to
+58
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is "spam checker" only for the module API?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, or really there is also the pre-module-API 'spam checker API' but it got superseded by the module API, I think we have a compatibility shim. |
||
| Redacted(bool), | ||
| TxnId(Box<str>), | ||
| TokenId(i64), | ||
|
|
@@ -104,6 +105,13 @@ impl EventInternalMetadataData { | |
| .to_owned() | ||
| .into_any(), | ||
| ), | ||
| EventInternalMetadataData::SpamCheckerSpammy(o) => ( | ||
| pyo3::intern!(py, "spam_checker_spammy"), | ||
| o.into_pyobject(py) | ||
| .unwrap_infallible() | ||
| .to_owned() | ||
| .into_any(), | ||
| ), | ||
| EventInternalMetadataData::Redacted(o) => ( | ||
| pyo3::intern!(py, "redacted"), | ||
| o.into_pyobject(py) | ||
|
|
@@ -168,6 +176,11 @@ impl EventInternalMetadataData { | |
| .extract() | ||
| .with_context(|| format!("'{key_str}' has invalid type"))?, | ||
| ), | ||
| "spam_checker_spammy" => EventInternalMetadataData::SpamCheckerSpammy( | ||
| value | ||
| .extract() | ||
| .with_context(|| format!("'{key_str}' has invalid type"))?, | ||
| ), | ||
| "redacted" => EventInternalMetadataData::Redacted( | ||
| value | ||
| .extract() | ||
|
|
@@ -451,6 +464,17 @@ impl EventInternalMetadata { | |
| set_property!(self, PolicyServerSpammy, obj); | ||
| } | ||
|
|
||
| #[getter] | ||
| fn get_spam_checker_spammy(&self) -> PyResult<bool> { | ||
| Ok(get_property_opt!(self, SpamCheckerSpammy) | ||
| .copied() | ||
| .unwrap_or(false)) | ||
| } | ||
| #[setter] | ||
| fn set_spam_checker_spammy(&mut self, obj: bool) { | ||
| set_property!(self, SpamCheckerSpammy, obj); | ||
| } | ||
|
|
||
| #[getter] | ||
| fn get_redacted(&self) -> PyResult<bool> { | ||
| let bool = get_property!(self, Redacted)?; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,7 +177,9 @@ async def _check_sigs_and_hash( | |
| # Note: we don't redact the event so admins can inspect the event after the | ||
| # fact. Other processes may redact the event, but that won't be applied to | ||
| # the database copy of the event until the server's config requires it. | ||
| return pdu | ||
| # | ||
| # We also *don't* return early here as we would still like to evaluate | ||
| # `spam_checker_spammy`, for completeness. | ||
|
|
||
| spam_check = await self._spam_checker_module_callbacks.check_event_for_spam(pdu) | ||
|
|
||
|
|
@@ -194,6 +196,8 @@ async def _check_sigs_and_hash( | |
| # using the event in prev_events). | ||
| redacted_event = prune_event(pdu) | ||
| redacted_event.internal_metadata.soft_failed = True | ||
| # Mark this as spam so we don't re-evaluate soft-failure status. | ||
| redacted_event.internal_metadata.spam_checker_spammy = True | ||
|
Comment on lines
+199
to
+200
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on how this was used in #18968, perhaps Especially as the reasoning there is "Skipping the insertion of these types of 'invalid' events is useful for performance reasons because they would fill up the table yet we wouldn't show them to clients anyway." and
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, this comment below clarifies why we distinguish this:
|
||
| return redacted_event | ||
|
|
||
| return pdu | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,22 +12,33 @@ | |
| # <https://www.gnu.org/licenses/agpl-3.0.html>. | ||
| # | ||
| # | ||
| from http import HTTPStatus | ||
| from typing import Literal | ||
|
|
||
| from twisted.internet.testing import MemoryReactor | ||
|
|
||
| from synapse.api.constants import EventContentFields, EventTypes | ||
| from synapse.api.constants import ( | ||
| EventContentFields, | ||
| EventTypes, | ||
| Membership, | ||
| ) | ||
| from synapse.api.room_versions import RoomVersions | ||
| from synapse.config.server import DEFAULT_ROOM_VERSION | ||
| from synapse.events import make_event_from_dict | ||
| from synapse.module_api import EventBase | ||
| from synapse.rest import admin, login, room, room_upgrade_rest_servlet | ||
| from synapse.server import HomeServer | ||
| from synapse.types import Codes, JsonDict | ||
| from synapse.util.clock import Clock | ||
|
|
||
| from tests import unittest | ||
| from tests.server import FakeChannel | ||
| from tests.unittest import HomeserverTestCase | ||
|
|
||
|
|
||
| class SpamCheckerTestCase(HomeserverTestCase): | ||
| """Tests for the spam checker module API.""" | ||
|
|
||
| servlets = [ | ||
| room.register_servlets, | ||
| admin.register_servlets, | ||
|
|
@@ -284,3 +295,178 @@ async def user_may_send_state_event( | |
|
|
||
| self.assertEqual(channel.code, 403) | ||
| self.assertEqual(channel.json_body["errcode"], Codes.FORBIDDEN) | ||
|
|
||
|
|
||
| class FederatedEventSpamCheckMetadataTestCase(unittest.FederatingHomeserverTestCase): | ||
| servlets = [ | ||
| admin.register_servlets, | ||
| login.register_servlets, | ||
| room.register_servlets, | ||
| ] | ||
|
|
||
| def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: | ||
| super().prepare(reactor, clock, hs) | ||
| self._module_api = hs.get_module_api() | ||
| self._store = hs.get_datastores().main | ||
| self._storage_controllers = hs.get_storage_controllers() | ||
| self._federation_event_handler = hs.get_federation_event_handler() | ||
| self._federation_server = hs.get_federation_server() | ||
| self._state_handler = hs.get_state_handler() | ||
| self._persistence_controller = hs.get_storage_controllers().persistence | ||
|
|
||
| # Create a room | ||
| user1_id = self.register_user("user1", "pass") | ||
| user1_tok = self.login(user1_id, "pass") | ||
| self.room_id = self.helper.create_room_as( | ||
| user1_id, | ||
| tok=user1_tok, | ||
| is_public=True, | ||
| room_version=RoomVersions.V10.identifier, | ||
| ) | ||
|
|
||
| # Prepare a join for the 'remote' user | ||
| state_map = self.get_success( | ||
| self._storage_controllers.state.get_current_state(self.room_id) | ||
| ) | ||
| forward_extremity_event_ids = self.get_success( | ||
| self.hs.get_datastores().main.get_latest_event_ids_in_room(self.room_id) | ||
| ) | ||
| self.remote_user_id = f"@remoteuser:{self.OTHER_SERVER_NAME}" | ||
| self.remote_user_join_event = make_event_from_dict( | ||
| self.add_hashes_and_signatures_from_other_server( | ||
| { | ||
| "room_id": self.room_id, | ||
| "sender": self.remote_user_id, | ||
| "state_key": self.remote_user_id, | ||
| "depth": 1000, | ||
| "origin_server_ts": 1, | ||
| "type": EventTypes.Member, | ||
| "content": {"membership": Membership.JOIN}, | ||
| "auth_events": [ | ||
| state_map[(EventTypes.Create, "")].event_id, | ||
| state_map[(EventTypes.JoinRules, "")].event_id, | ||
| ], | ||
| "prev_events": list(forward_extremity_event_ids), | ||
| } | ||
| ), | ||
| room_version=RoomVersions.V10, | ||
| ) | ||
|
|
||
| # Send the join | ||
| self.get_success( | ||
| self._federation_event_handler.on_receive_pdu( | ||
| self.OTHER_SERVER_NAME, self.remote_user_join_event | ||
| ) | ||
| ) | ||
|
|
||
| # Check the join made it to the 'local' view of the room | ||
| self.helper.get_event( | ||
| room_id=self.room_id, | ||
| event_id=self.remote_user_join_event.event_id, | ||
| tok=user1_tok, | ||
| expect_code=HTTPStatus.OK, | ||
| ) | ||
|
reivilibre marked this conversation as resolved.
|
||
|
|
||
| def test_federated_events_with_spam_checker_metadata(self) -> None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to add a in-repo Complement test for this sort of thing. Only hiccup would be setting up/configuring up the spam checker module but we could always configure one and have some specific constant that triggers this behavior
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how much it fits; I feel the trial test is adequate and is nicer to develop on.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make it more clear, I think the trial test is sufficient 👍 It was more an ideal. One benefit would be avoiding the room version details (like this problem). We could also avoid the internal details of manually puppeting federation here. In terms of checking, we would probably check via |
||
| """ | ||
| Simulates receiving spammy and non-spammy events over federation, | ||
| then checks their `spam_checker_spammy` flag is set properly. | ||
| """ | ||
|
|
||
| async def check_event_for_spam(event: EventBase) -> Literal["NOT_SPAM"] | Codes: | ||
| if event.type == EventTypes.Message: | ||
| if "ham" not in event.content["body"]: | ||
| return Codes.FORBIDDEN | ||
| return "NOT_SPAM" | ||
|
|
||
| # Register a spam checker callback that only allows messages with 'ham' | ||
| self._module_api.register_spam_checker_callbacks( | ||
| check_event_for_spam=check_event_for_spam | ||
| ) | ||
|
|
||
| # Prepare a spammy and a non-spammy event. | ||
| forward_extremity_event_ids = self.get_success( | ||
| self._store.get_latest_event_ids_in_room(self.room_id) | ||
| ) | ||
| state_map = self.get_success( | ||
| self._storage_controllers.state.get_current_state(self.room_id) | ||
| ) | ||
| spammy_event = make_event_from_dict( | ||
| self.add_hashes_and_signatures_from_other_server( | ||
| { | ||
| "room_id": self.room_id, | ||
| "sender": self.remote_user_id, | ||
| "depth": 2000, | ||
| "origin_server_ts": 2, | ||
| "type": EventTypes.Message, | ||
| "content": {"body": "this is spam", "msgtype": "m.text"}, | ||
| "auth_events": [ | ||
| state_map[(EventTypes.Create, "")].event_id, | ||
| state_map[(EventTypes.JoinRules, "")].event_id, | ||
| state_map[(EventTypes.Member, self.remote_user_id)].event_id, | ||
| ], | ||
| "prev_events": list(forward_extremity_event_ids), | ||
| } | ||
| ), | ||
| room_version=RoomVersions.V10, | ||
| ) | ||
| non_spammy_event = make_event_from_dict( | ||
| self.add_hashes_and_signatures_from_other_server( | ||
| { | ||
| "room_id": self.room_id, | ||
| "sender": self.remote_user_id, | ||
| "depth": 2000, | ||
| "origin_server_ts": 2, | ||
| "type": EventTypes.Message, | ||
| "content": {"body": "delicious ham", "msgtype": "m.text"}, | ||
| "auth_events": [ | ||
| state_map[(EventTypes.Create, "")].event_id, | ||
| state_map[(EventTypes.JoinRules, "")].event_id, | ||
| state_map[(EventTypes.Member, self.remote_user_id)].event_id, | ||
| ], | ||
| "prev_events": list(forward_extremity_event_ids), | ||
| } | ||
| ), | ||
| room_version=RoomVersions.V10, | ||
| ) | ||
|
|
||
| # Receive these events over federation | ||
| # We need to let the federation server have them because it will | ||
| # invoke `_check_sigs_and_hash` which invokes the spam checker. | ||
| self.get_success( | ||
| self._federation_server._handle_received_pdu( | ||
| self.OTHER_SERVER_NAME, spammy_event | ||
| ) | ||
| ) | ||
| self.get_success( | ||
| self._federation_server._handle_received_pdu( | ||
| self.OTHER_SERVER_NAME, non_spammy_event | ||
| ) | ||
| ) | ||
|
|
||
| # Retrieve the events from the database | ||
| retrieved_spammy_event = self.get_success( | ||
| self._store.get_event(spammy_event.event_id, allow_rejected=True) | ||
| ) | ||
| retrieved_non_spammy_event = self.get_success( | ||
| self._store.get_event(non_spammy_event.event_id, allow_rejected=True) | ||
| ) | ||
|
|
||
| # Assert the spammy flags (and soft-failed flags, for good measure) are set properly | ||
| self.assertTrue( | ||
| retrieved_spammy_event.internal_metadata.spam_checker_spammy, | ||
| "Spammy inbound event should be marked as spam_checker_spammy!", | ||
| ) | ||
| self.assertTrue( | ||
| retrieved_spammy_event.internal_metadata.is_soft_failed(), | ||
| "Spammy inbound event should be soft-failed.", | ||
| ) | ||
|
|
||
| self.assertFalse( | ||
| retrieved_non_spammy_event.internal_metadata.spam_checker_spammy, | ||
| "Non-spammy inbound event should not be marked as spam_checker_spammy!", | ||
| ) | ||
| self.assertFalse( | ||
| retrieved_non_spammy_event.internal_metadata.is_soft_failed(), | ||
| "Non-spammy inbound event should not be soft-failed.", | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.