From 624d1fcc9a02e9e207ae4121b31a7f3f16f6f9f8 Mon Sep 17 00:00:00 2001 From: mikoto Date: Mon, 29 Apr 2024 20:59:02 +0200 Subject: [PATCH] draft: RoomEventFilter --- Cargo.toml | 2 ++ src/api/client_server/context.rs | 32 +++++++++++++++++ src/database/key_value/rooms/search.rs | 20 +++++++++-- src/database/mod.rs | 2 ++ src/utils/filter.rs | 48 ++++++++++++++++++++++++++ src/utils/mod.rs | 1 + 6 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 src/utils/filter.rs diff --git a/Cargo.toml b/Cargo.toml index 3a5c2647..65209a97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -157,6 +157,8 @@ tikv-jemallocator = { version = "0.5.0", features = [ sd-notify = { version = "0.4.1", optional = true } +url = "2.5.0" + [dependencies.rocksdb] features = ["lz4", "multi-threaded-cf", "zstd"] optional = true diff --git a/src/api/client_server/context.rs b/src/api/client_server/context.rs index 8e193e6b..011e2647 100644 --- a/src/api/client_server/context.rs +++ b/src/api/client_server/context.rs @@ -109,6 +109,38 @@ pub async fn get_context_route( let events_before: Vec<_> = events_before .into_iter() .map(|(_, pdu)| pdu.to_room_event()) + .filter(|event| { + if let Some(types) = &body.filter.types { + types + .iter() + .find(|t| { + t == &&event + .get_field::("type") + .expect("events should deserialize") + .expect("events should have a type") + }) + .is_some() + } else { + true + } + }) + .filter(|event| { + if !body.filter.not_types.is_empty() { + body + .filter + .not_types + .iter() + .find(|t| { + t == &&event + .get_field::("type") + .expect("events should deserialize") + .expect("events should have a type") + }) + .is_none() + } else { + true + } + }) .collect(); let events_after: Vec<_> = services() diff --git a/src/database/key_value/rooms/search.rs b/src/database/key_value/rooms/search.rs index ad573f06..387eb9da 100644 --- a/src/database/key_value/rooms/search.rs +++ b/src/database/key_value/rooms/search.rs @@ -1,10 +1,13 @@ +use std::str::FromStr; + use ruma::RoomId; +use url::Url; use crate::{database::KeyValueDatabase, service, services, utils, Result}; impl service::rooms::search::Data for KeyValueDatabase { fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> { - let mut batch = message_body + let mut token_batch = message_body .split_terminator(|c: char| !c.is_alphanumeric()) .filter(|s| !s.is_empty()) .filter(|word| word.len() <= 50) @@ -17,7 +20,20 @@ impl service::rooms::search::Data for KeyValueDatabase { (key, Vec::new()) }); - self.tokenids.insert_batch(&mut batch) + self.tokenids.insert_batch(&mut token_batch)?; + + let mut url_batch = message_body + .split_terminator(|c: char| !c.is_whitespace()) + .filter(|word| Url::from_str(word).is_ok()) + .map(|url| { + let mut key = shortroomid.to_be_bytes().to_vec(); + key.extend_from_slice(url.as_bytes()); + key.push(0xff); + key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here + (key, Vec::new()) + }); + + self.urltokenids.insert_batch(&mut url_batch) } fn search_pdus<'a>( diff --git a/src/database/mod.rs b/src/database/mod.rs index 8d1b1913..5397fe20 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -85,6 +85,7 @@ pub struct KeyValueDatabase { pub(super) threadid_userids: Arc, // ThreadId = RoomId + Count pub(super) tokenids: Arc, // TokenId = ShortRoomId + Token + PduIdCount + pub(super) urltokenids: Arc, // useful for `RoomEventFilter::contains_url` /// Participating servers in a room. pub(super) roomserverids: Arc, // RoomServerId = RoomId + ServerName @@ -312,6 +313,7 @@ impl KeyValueDatabase { threadid_userids: builder.open_tree("threadid_userids")?, tokenids: builder.open_tree("tokenids")?, + urltokenids: builder.open_tree("urltokenids")?, roomserverids: builder.open_tree("roomserverids")?, serverroomids: builder.open_tree("serverroomids")?, diff --git a/src/utils/filter.rs b/src/utils/filter.rs new file mode 100644 index 00000000..ae0e833c --- /dev/null +++ b/src/utils/filter.rs @@ -0,0 +1,48 @@ +use ruma::{api::client::filter::RoomEventFilter, events::AnyTimelineEvent, serde::Raw}; + +pub fn filter_room_events>>( + events: I, + filter: RoomEventFilter, +) { + events + .filter(|event| match &filter.types { + None => true, + Some(types) => types.iter().any(|t| { + t.as_str() + == event + .get_field::("type") + .expect("room events should deserialize") + .expect("room events should have a type") + }), + }) + .filter(|event| match &filter.not_types[..] { + [] => true, + not_types => not_types.iter().all(|t| { + t.as_str() + != event + .get_field::("type") + .expect("room events should deserialize") + .expect("room events should have a type") + }), + }) + .filter(|event| match &filter.rooms { + None => true, + Some(rooms) => rooms.iter().any(|r| { + r.as_str() + == event + .get_field::("room_id") + .expect("room events should deserialize") + .expect("room events should have a type") + }), + }) + .filter(|event| match &filter.rooms { + None => true, + Some(rooms) => rooms.iter().all(|r| { + r.as_str() + != event + .get_field::("room_id") + .expect("room events should deserialize") + .expect("room events should have a type") + }), + }); +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index d09a1033..0475c26a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,4 +1,5 @@ pub mod error; +mod filter; use argon2::{Config, Variant}; use cmp::Ordering;