1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-07-22 17:18:35 +00:00

create indices for and filter by contains_url

This commit is contained in:
mikoto 2024-05-23 02:19:59 +02:00
parent 7438f2c4e0
commit a39eaf2080
8 changed files with 117 additions and 28 deletions

1
Cargo.lock generated
View file

@ -2441,6 +2441,7 @@ version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
dependencies = [ dependencies = [
"indexmap 2.2.5",
"itoa", "itoa",
"ryu", "ryu",
"serde", "serde",

View file

@ -14,6 +14,7 @@ channel = "1.78.0"
components = [ components = [
# For rust-analyzer # For rust-analyzer
"rust-src", "rust-src",
"rust-analyzer",
] ]
targets = [ targets = [
"aarch64-unknown-linux-musl", "aarch64-unknown-linux-musl",

View file

@ -1,7 +1,8 @@
use crate::{services, Error, Result, Ruma}; use crate::{services, utils::filter, Error, Result, Ruma};
use ruma::{ use ruma::{
api::client::{context::get_context, error::ErrorKind, filter::LazyLoadOptions}, api::client::{context::get_context, error::ErrorKind, filter::LazyLoadOptions},
events::StateEventType, events::StateEventType,
serde::Raw,
}; };
use std::collections::HashSet; use std::collections::HashSet;
use tracing::error; use tracing::error;
@ -78,7 +79,6 @@ pub async fn get_context_route(
.rooms .rooms
.timeline .timeline
.pdus_until(sender_user, &room_id, base_token)? .pdus_until(sender_user, &room_id, base_token)?
.take(limit / 2)
.filter_map(|r| r.ok()) // Remove buggy events .filter_map(|r| r.ok()) // Remove buggy events
.filter(|(_, pdu)| { .filter(|(_, pdu)| {
services() services()
@ -109,13 +109,33 @@ pub async fn get_context_route(
let events_before: Vec<_> = events_before let events_before: Vec<_> = events_before
.into_iter() .into_iter()
.map(|(_, pdu)| pdu.to_room_event()) .map(|(_, pdu)| pdu.to_room_event())
.filter(|v| {
filter::senders(
v,
body.filter.senders.as_ref(),
body.filter.not_senders.as_ref(),
)
})
.filter(|v| {
filter::types(
v,
body.filter.types.as_ref(),
body.filter.not_types.as_ref(),
)
})
.filter(|v| {
body.filter
.url_filter
.map(|f| filter::url(v, &room_id, f))
.unwrap_or(true)
})
.take(limit / 2)
.collect(); .collect();
let events_after: Vec<_> = services() let events_after: Vec<_> = services()
.rooms .rooms
.timeline .timeline
.pdus_after(sender_user, &room_id, base_token)? .pdus_after(sender_user, &room_id, base_token)?
.take(limit / 2)
.filter_map(|r| r.ok()) // Remove buggy events .filter_map(|r| r.ok()) // Remove buggy events
.filter(|(_, pdu)| { .filter(|(_, pdu)| {
services() services()
@ -165,6 +185,27 @@ pub async fn get_context_route(
let events_after: Vec<_> = events_after let events_after: Vec<_> = events_after
.into_iter() .into_iter()
.map(|(_, pdu)| pdu.to_room_event()) .map(|(_, pdu)| pdu.to_room_event())
.filter(|v| {
filter::senders(
v,
body.filter.senders.as_ref(),
body.filter.not_senders.as_ref(),
)
})
.filter(|v| {
filter::types(
v,
body.filter.types.as_ref(),
body.filter.not_types.as_ref(),
)
})
.filter(|v| {
body.filter
.url_filter
.map(|f| filter::url(v, &room_id, f))
.unwrap_or(true)
})
.take(limit / 2)
.collect(); .collect();
let mut state = Vec::new(); let mut state = Vec::new();
@ -175,24 +216,34 @@ pub async fn get_context_route(
.short .short
.get_statekey_from_short(shortstatekey)?; .get_statekey_from_short(shortstatekey)?;
if event_type != StateEventType::RoomMember { if !filter::types(
let pdu = match services().rooms.timeline.get_pdu(&id)? { &Raw::new(&serde_json::json!({"type": event_type})).expect("json can be serialized"),
Some(pdu) => pdu, body.filter.types.as_ref(),
body.filter.not_types.as_ref(),
) {
continue;
}
if event_type != StateEventType::RoomMember
|| (!lazy_load_enabled || lazy_loaded.contains(&state_key))
{
let event = match services().rooms.timeline.get_pdu(&id)? {
Some(pdu) => pdu.to_state_event(),
None => { None => {
error!("Pdu in state not found: {}", id); error!("Pdu in state not found: {}", id);
continue; continue;
} }
}; };
state.push(pdu.to_state_event());
} else if !lazy_load_enabled || lazy_loaded.contains(&state_key) { if !filter::senders(
let pdu = match services().rooms.timeline.get_pdu(&id)? { &event,
Some(pdu) => pdu, body.filter.senders.as_ref(),
None => { body.filter.not_senders.as_ref(),
error!("Pdu in state not found: {}", id); ) {
continue; continue;
} }
};
state.push(pdu.to_state_event()); state.push(event);
} }
} }

View file

@ -7,7 +7,7 @@ use crate::{
use ruma::{ use ruma::{
api::client::{ api::client::{
filter::{EventFormat, FilterDefinition, LazyLoadOptions, RoomFilter, UrlFilter}, filter::{EventFormat, FilterDefinition, LazyLoadOptions, RoomFilter},
sync::sync_events::{ sync::sync_events::{
self, self,
v3::{ v3::{
@ -1111,9 +1111,12 @@ async fn load_joined_room(
.expect("json can be serialized"), .expect("json can be serialized"),
_ => pdu.to_sync_room_event(), _ => pdu.to_sync_room_event(),
}) })
.filter(|v| match filter.timeline.url_filter.unwrap_or(true) { .filter(|v| {
UrlFilter::EventsWithUrl => todo!(), filter
UrlFilter::EventsWithoutUrl => todo!(), .timeline
.url_filter
.map(|f| filter::url(v, room_id, f))
.unwrap_or(true)
}) })
.filter(|v| { .filter(|v| {
filter::senders( filter::senders(

View file

@ -34,7 +34,7 @@ impl service::rooms::search::Data for KeyValueDatabase {
key.push(0xff); key.push(0xff);
key.extend_from_slice(pdu_id); key.extend_from_slice(pdu_id);
self.urltokenids.insert(&key, <&[u8]>::default())?; self.urltokenids.insert(&key, Default::default())?;
} }
Ok(()) Ok(())
@ -85,15 +85,17 @@ impl service::rooms::search::Data for KeyValueDatabase {
Ok(Some((Box::new(common_elements), words))) Ok(Some((Box::new(common_elements), words)))
} }
fn contains_url<'a>(&'a self, room_id: &RoomId, pdu_id: &[u8]) -> Result<bool> { fn contains_url(&self, room_id: &RoomId, pdu_id: &[u8]) -> Result<bool> {
let prefix = services() let prefix = services()
.rooms .rooms
.short .short
.get_shortroomid(room_id)? .get_shortroomid(room_id)?
.expect("room exists") .expect("room exists");
.to_be_bytes()
.to_vec();
todo!() let mut key = prefix.to_be_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(pdu_id);
self.urltokenids.get(&key).map(|v| v.is_some())
} }
} }

View file

@ -11,5 +11,5 @@ pub trait Data: Send + Sync {
search_string: &str, search_string: &str,
) -> Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>>; ) -> Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>>;
fn contains_url<'a>(&'a self, room_id: &RoomId, pdu_id: &[u8]) -> Result<bool>; fn contains_url(&self, room_id: &RoomId, pdu_id: &[u8]) -> Result<bool>;
} }

View file

@ -23,4 +23,9 @@ impl Service {
) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>> { ) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>> {
self.db.search_pdus(room_id, search_string) self.db.search_pdus(room_id, search_string)
} }
#[tracing::instrument(skip(self))]
pub fn contains_url(&self, room_id: &RoomId, pdu_id: &[u8]) -> Result<bool> {
self.db.contains_url(room_id, pdu_id)
}
} }

View file

@ -1,6 +1,10 @@
use ruma::{serde::Raw, OwnedRoomId, OwnedUserId}; use ruma::{
api::client::filter::UrlFilter, serde::Raw, OwnedEventId, OwnedRoomId, OwnedUserId, RoomId,
};
use serde::Deserialize; use serde::Deserialize;
use crate::services;
fn inclusion<T, F: for<'a> Deserialize<'a> + PartialEq>( fn inclusion<T, F: for<'a> Deserialize<'a> + PartialEq>(
event: &Raw<T>, event: &Raw<T>,
field: &str, field: &str,
@ -37,3 +41,25 @@ pub fn senders<T>(
pub fn types<T>(event: &Raw<T>, types: Option<&Vec<String>>, not_types: &[String]) -> bool { pub fn types<T>(event: &Raw<T>, types: Option<&Vec<String>>, not_types: &[String]) -> bool {
inclusion(event, "event_type", types, not_types) inclusion(event, "event_type", types, not_types)
} }
pub fn url<T>(event: &Raw<T>, room_id: &RoomId, filter: UrlFilter) -> bool {
let Ok(Some(pdu_id)) = services().rooms.timeline.get_pdu_id(
&event
.get_field::<OwnedEventId>("event_id")
.expect("event_id can be deserialized")
.expect("event should have event_id"),
) else {
return filter == UrlFilter::EventsWithoutUrl;
};
let contains_url = services()
.rooms
.search
.contains_url(room_id, &pdu_id)
.unwrap_or(false);
match filter {
UrlFilter::EventsWithUrl => contains_url,
UrlFilter::EventsWithoutUrl => !contains_url,
}
}