From ada1251a5236d5b3d958421d062d96b9335493a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Sun, 10 Jul 2022 16:28:43 +0200 Subject: [PATCH] refactor: work on search --- src/database/key_value/room.rs | 1 - .../key_value/{room => rooms}/alias.rs | 0 .../key_value/{room => rooms}/directory.rs | 0 .../{room => rooms}/edus/presence.rs | 0 .../{room => rooms}/edus/read_receipt.rs | 0 .../key_value/{room => rooms}/edus/typing.rs | 0 .../key_value/{room => rooms}/lazy_load.rs | 0 .../key_value/{room => rooms}/metadata.rs | 0 src/database/key_value/{room => rooms}/mod.rs | 0 .../key_value/{room => rooms}/outlier.rs | 0 .../key_value/{room => rooms}/pdu_metadata.rs | 0 src/database/key_value/rooms/search.rs | 66 +++++++++++++++++++ .../key_value/{room => rooms}/state.rs | 0 src/service/rooms/search/data.rs | 9 +++ src/service/rooms/search/mod.rs | 53 ++++----------- src/service/rooms/timeline/mod.rs | 15 +---- 16 files changed, 87 insertions(+), 57 deletions(-) delete mode 100644 src/database/key_value/room.rs rename src/database/key_value/{room => rooms}/alias.rs (100%) rename src/database/key_value/{room => rooms}/directory.rs (100%) rename src/database/key_value/{room => rooms}/edus/presence.rs (100%) rename src/database/key_value/{room => rooms}/edus/read_receipt.rs (100%) rename src/database/key_value/{room => rooms}/edus/typing.rs (100%) rename src/database/key_value/{room => rooms}/lazy_load.rs (100%) rename src/database/key_value/{room => rooms}/metadata.rs (100%) rename src/database/key_value/{room => rooms}/mod.rs (100%) rename src/database/key_value/{room => rooms}/outlier.rs (100%) rename src/database/key_value/{room => rooms}/pdu_metadata.rs (100%) create mode 100644 src/database/key_value/rooms/search.rs rename src/database/key_value/{room => rooms}/state.rs (100%) create mode 100644 src/service/rooms/search/data.rs diff --git a/src/database/key_value/room.rs b/src/database/key_value/room.rs deleted file mode 100644 index 8bd6648e..00000000 --- a/src/database/key_value/room.rs +++ /dev/null @@ -1 +0,0 @@ -asdf diff --git a/src/database/key_value/room/alias.rs b/src/database/key_value/rooms/alias.rs similarity index 100% rename from src/database/key_value/room/alias.rs rename to src/database/key_value/rooms/alias.rs diff --git a/src/database/key_value/room/directory.rs b/src/database/key_value/rooms/directory.rs similarity index 100% rename from src/database/key_value/room/directory.rs rename to src/database/key_value/rooms/directory.rs diff --git a/src/database/key_value/room/edus/presence.rs b/src/database/key_value/rooms/edus/presence.rs similarity index 100% rename from src/database/key_value/room/edus/presence.rs rename to src/database/key_value/rooms/edus/presence.rs diff --git a/src/database/key_value/room/edus/read_receipt.rs b/src/database/key_value/rooms/edus/read_receipt.rs similarity index 100% rename from src/database/key_value/room/edus/read_receipt.rs rename to src/database/key_value/rooms/edus/read_receipt.rs diff --git a/src/database/key_value/room/edus/typing.rs b/src/database/key_value/rooms/edus/typing.rs similarity index 100% rename from src/database/key_value/room/edus/typing.rs rename to src/database/key_value/rooms/edus/typing.rs diff --git a/src/database/key_value/room/lazy_load.rs b/src/database/key_value/rooms/lazy_load.rs similarity index 100% rename from src/database/key_value/room/lazy_load.rs rename to src/database/key_value/rooms/lazy_load.rs diff --git a/src/database/key_value/room/metadata.rs b/src/database/key_value/rooms/metadata.rs similarity index 100% rename from src/database/key_value/room/metadata.rs rename to src/database/key_value/rooms/metadata.rs diff --git a/src/database/key_value/room/mod.rs b/src/database/key_value/rooms/mod.rs similarity index 100% rename from src/database/key_value/room/mod.rs rename to src/database/key_value/rooms/mod.rs diff --git a/src/database/key_value/room/outlier.rs b/src/database/key_value/rooms/outlier.rs similarity index 100% rename from src/database/key_value/room/outlier.rs rename to src/database/key_value/rooms/outlier.rs diff --git a/src/database/key_value/room/pdu_metadata.rs b/src/database/key_value/rooms/pdu_metadata.rs similarity index 100% rename from src/database/key_value/room/pdu_metadata.rs rename to src/database/key_value/rooms/pdu_metadata.rs diff --git a/src/database/key_value/rooms/search.rs b/src/database/key_value/rooms/search.rs new file mode 100644 index 00000000..1ffffe56 --- /dev/null +++ b/src/database/key_value/rooms/search.rs @@ -0,0 +1,66 @@ +impl service::room::search::Data for KeyValueDatabase { + + fn index_pdu<'a>(&self, room_id: &RoomId, pdu_id: u64, message_body: String) -> Result<()> { + let mut batch = body + .split_terminator(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty()) + .filter(|word| word.len() <= 50) + .map(str::to_lowercase) + .map(|word| { + let mut key = shortroomid.to_be_bytes().to_vec(); + key.extend_from_slice(word.as_bytes()); + key.push(0xff); + key.extend_from_slice(&pdu_id); + (key, Vec::new()) + }); + + self.tokenids.insert_batch(&mut batch)?; + } + + fn search_pdus<'a>( + &'a self, + room_id: &RoomId, + search_string: &str, + ) -> Result> + 'a, Vec)>> { + let prefix = self + .get_shortroomid(room_id)? + .expect("room exists") + .to_be_bytes() + .to_vec(); + let prefix_clone = prefix.clone(); + + let words: Vec<_> = search_string + .split_terminator(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty()) + .map(str::to_lowercase) + .collect(); + + let iterators = words.clone().into_iter().map(move |word| { + let mut prefix2 = prefix.clone(); + prefix2.extend_from_slice(word.as_bytes()); + prefix2.push(0xff); + + let mut last_possible_id = prefix2.clone(); + last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes()); + + self.tokenids + .iter_from(&last_possible_id, true) // Newest pdus first + .take_while(move |(k, _)| k.starts_with(&prefix2)) + .map(|(key, _)| key[key.len() - size_of::()..].to_vec()) + }); + + Ok(utils::common_elements(iterators, |a, b| { + // We compare b with a because we reversed the iterator earlier + b.cmp(a) + }) + .map(|iter| { + ( + iter.map(move |id| { + let mut pduid = prefix_clone.clone(); + pduid.extend_from_slice(&id); + pduid + }), + words, + ) + })) + } diff --git a/src/database/key_value/room/state.rs b/src/database/key_value/rooms/state.rs similarity index 100% rename from src/database/key_value/room/state.rs rename to src/database/key_value/rooms/state.rs diff --git a/src/service/rooms/search/data.rs b/src/service/rooms/search/data.rs new file mode 100644 index 00000000..1601e0de --- /dev/null +++ b/src/service/rooms/search/data.rs @@ -0,0 +1,9 @@ +pub trait Data { + pub fn index_pdu<'a>(&self, room_id: &RoomId, pdu_id: u64, message_body: String) -> Result<()>; + + pub fn search_pdus<'a>( + &'a self, + room_id: &RoomId, + search_string: &str, + ) -> Result> + 'a, Vec)>>; +} diff --git a/src/service/rooms/search/mod.rs b/src/service/rooms/search/mod.rs index ce055058..5478273c 100644 --- a/src/service/rooms/search/mod.rs +++ b/src/service/rooms/search/mod.rs @@ -1,50 +1,19 @@ +mod data; +pub use data::Data; +use crate::service::*; + +pub struct Service { + db: D, +} + +impl Service<_> { #[tracing::instrument(skip(self))] pub fn search_pdus<'a>( &'a self, room_id: &RoomId, search_string: &str, ) -> Result> + 'a, Vec)>> { - let prefix = self - .get_shortroomid(room_id)? - .expect("room exists") - .to_be_bytes() - .to_vec(); - let prefix_clone = prefix.clone(); - - let words: Vec<_> = search_string - .split_terminator(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty()) - .map(str::to_lowercase) - .collect(); - - let iterators = words.clone().into_iter().map(move |word| { - let mut prefix2 = prefix.clone(); - prefix2.extend_from_slice(word.as_bytes()); - prefix2.push(0xff); - - let mut last_possible_id = prefix2.clone(); - last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes()); - - self.tokenids - .iter_from(&last_possible_id, true) // Newest pdus first - .take_while(move |(k, _)| k.starts_with(&prefix2)) - .map(|(key, _)| key[key.len() - size_of::()..].to_vec()) - }); - - Ok(utils::common_elements(iterators, |a, b| { - // We compare b with a because we reversed the iterator earlier - b.cmp(a) - }) - .map(|iter| { - ( - iter.map(move |id| { - let mut pduid = prefix_clone.clone(); - pduid.extend_from_slice(&id); - pduid - }), - words, - ) - })) + self.db.search_pdus(room_id, search_string) } - +} diff --git a/src/service/rooms/timeline/mod.rs b/src/service/rooms/timeline/mod.rs index 6299b16c..5b423d2d 100644 --- a/src/service/rooms/timeline/mod.rs +++ b/src/service/rooms/timeline/mod.rs @@ -439,20 +439,7 @@ .map_err(|_| Error::bad_database("Invalid content in pdu."))?; if let Some(body) = content.body { - let mut batch = body - .split_terminator(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty()) - .filter(|word| word.len() <= 50) - .map(str::to_lowercase) - .map(|word| { - let mut key = shortroomid.to_be_bytes().to_vec(); - key.extend_from_slice(word.as_bytes()); - key.push(0xff); - key.extend_from_slice(&pdu_id); - (key, Vec::new()) - }); - - self.tokenids.insert_batch(&mut batch)?; + DB.rooms.search.index_pdu(room_id, pdu_id, body)?; let admin_room = self.id_from_alias( <&RoomAliasId>::try_from(