1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-08-06 17:40:59 +00:00

fix: do not return redacted events from search

This commit is contained in:
Benjamin Lee 2024-06-11 16:33:55 +02:00 committed by Timo Kösters
parent 48c1f3bdba
commit 7b259272ce
No known key found for this signature in database
GPG key ID: 0B25E636FBA7E4CB
6 changed files with 92 additions and 25 deletions

View file

@ -2,24 +2,46 @@ use ruma::RoomId;
use crate::{database::KeyValueDatabase, service, services, utils, Result};
/// Splits a string into tokens used as keys in the search inverted index
///
/// This may be used to tokenize both message bodies (for indexing) or search
/// queries (for querying).
fn tokenize(body: &str) -> impl Iterator<Item = String> + '_ {
body.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
}
impl service::rooms::search::Data for KeyValueDatabase {
fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
let mut batch = message_body
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
.map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
(key, Vec::new())
});
let mut batch = tokenize(message_body).map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
(key, Vec::new())
});
self.tokenids.insert_batch(&mut batch)
}
fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
let batch = tokenize(message_body).map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xFF);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
key
});
for token in batch {
self.tokenids.remove(&token)?;
}
Ok(())
}
fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
@ -33,11 +55,7 @@ impl service::rooms::search::Data for KeyValueDatabase {
.to_be_bytes()
.to_vec();
let words: Vec<_> = search_string
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect();
let words: Vec<_> = tokenize(search_string).collect();
let iterators = words.clone().into_iter().map(move |word| {
let mut prefix2 = prefix.clone();