From 7438f2c4e0d8e517fb30c6912d2f106a0843aef1 Mon Sep 17 00:00:00 2001 From: mikoto Date: Thu, 23 May 2024 00:37:49 +0200 Subject: [PATCH] contains_url draft --- src/api/client_server/sync.rs | 6 +++- src/database/key_value/rooms/search.rs | 40 ++++++++++++++++++-------- src/service/rooms/search/data.rs | 2 ++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/api/client_server/sync.rs b/src/api/client_server/sync.rs index c340b9e6..8c95a3ef 100644 --- a/src/api/client_server/sync.rs +++ b/src/api/client_server/sync.rs @@ -7,7 +7,7 @@ use crate::{ use ruma::{ api::client::{ - filter::{EventFormat, FilterDefinition, LazyLoadOptions, RoomFilter}, + filter::{EventFormat, FilterDefinition, LazyLoadOptions, RoomFilter, UrlFilter}, sync::sync_events::{ self, v3::{ @@ -1111,6 +1111,10 @@ async fn load_joined_room( .expect("json can be serialized"), _ => pdu.to_sync_room_event(), }) + .filter(|v| match filter.timeline.url_filter.unwrap_or(true) { + UrlFilter::EventsWithUrl => todo!(), + UrlFilter::EventsWithoutUrl => todo!(), + }) .filter(|v| { filter::senders( v, diff --git a/src/database/key_value/rooms/search.rs b/src/database/key_value/rooms/search.rs index 387eb9da..c2bdfa8c 100644 --- a/src/database/key_value/rooms/search.rs +++ b/src/database/key_value/rooms/search.rs @@ -7,10 +7,17 @@ use crate::{database::KeyValueDatabase, service, services, utils, Result}; impl service::rooms::search::Data for KeyValueDatabase { fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> { + let mut contains_url = false; + let mut token_batch = message_body .split_terminator(|c: char| !c.is_alphanumeric()) .filter(|s| !s.is_empty()) - .filter(|word| word.len() <= 50) + .filter(|word| { + contains_url = + contains_url || (word.starts_with("http") && Url::from_str(word).is_ok()); + + word.len() <= 50 + }) .map(str::to_lowercase) .map(|word| { let mut key = shortroomid.to_be_bytes().to_vec(); @@ -22,18 +29,15 @@ impl service::rooms::search::Data for KeyValueDatabase { self.tokenids.insert_batch(&mut token_batch)?; - let mut url_batch = message_body - .split_terminator(|c: char| !c.is_whitespace()) - .filter(|word| Url::from_str(word).is_ok()) - .map(|url| { - let mut key = shortroomid.to_be_bytes().to_vec(); - key.extend_from_slice(url.as_bytes()); - key.push(0xff); - key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here - (key, Vec::new()) - }); + if contains_url { + let mut key = shortroomid.to_be_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(pdu_id); - self.urltokenids.insert_batch(&mut url_batch) + self.urltokenids.insert(&key, <&[u8]>::default())?; + } + + Ok(()) } fn search_pdus<'a>( @@ -80,4 +84,16 @@ impl service::rooms::search::Data for KeyValueDatabase { Ok(Some((Box::new(common_elements), words))) } + + fn contains_url<'a>(&'a self, room_id: &RoomId, pdu_id: &[u8]) -> Result { + let prefix = services() + .rooms + .short + .get_shortroomid(room_id)? + .expect("room exists") + .to_be_bytes() + .to_vec(); + + todo!() + } } diff --git a/src/service/rooms/search/data.rs b/src/service/rooms/search/data.rs index 7ea7e3d1..7d046fa5 100644 --- a/src/service/rooms/search/data.rs +++ b/src/service/rooms/search/data.rs @@ -10,4 +10,6 @@ pub trait Data: Send + Sync { room_id: &RoomId, search_string: &str, ) -> Result> + 'a>, Vec)>>; + + fn contains_url<'a>(&'a self, room_id: &RoomId, pdu_id: &[u8]) -> Result; }