From bb4cade9fd8bb7860bd9ce4d64f01a30ba9c4de1 Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Fri, 28 Jul 2023 17:53:15 +0200 Subject: [PATCH 01/15] Preview URLs Closes: #14 --- Cargo.toml | 3 + conduit-example.toml | 3 + debian/postinst | 3 + src/api/client_server/media.rs | 237 +++++++++++++++++++++++++++++++- src/config/mod.rs | 3 + src/database/key_value/media.rs | 109 +++++++++++++++ src/database/mod.rs | 4 + src/main.rs | 1 + src/service/globals/mod.rs | 4 + src/service/media/data.rs | 20 +++ src/service/media/mod.rs | 67 +++++++++ src/service/mod.rs | 9 +- 12 files changed, 460 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c74773a0..766cf49c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,6 +147,8 @@ tikv-jemallocator = { version = "0.5.0", features = [ sd-notify = { version = "0.4.1", optional = true } +webpage = { version = "1.6", default-features = false, optional = true } + # Used for matrix spec type definitions and helpers [dependencies.ruma] features = [ @@ -186,6 +188,7 @@ conduit_bin = ["axum"] jemalloc = ["tikv-jemallocator"] sqlite = ["parking_lot", "rusqlite", "tokio/signal"] systemd = ["sd-notify"] +url_preview = ["webpage"] [[bin]] name = "conduit" diff --git a/conduit-example.toml b/conduit-example.toml index 74cbb074..969c0074 100644 --- a/conduit-example.toml +++ b/conduit-example.toml @@ -47,6 +47,9 @@ registration_token = "" allow_check_for_updates = true allow_federation = true +# Allows clients to request a URL preview +allow_url_preview = false + # Enable the display name lightning bolt on registration. enable_lightning_bolt = true diff --git a/debian/postinst b/debian/postinst index 6361af5a..8738ffe8 100644 --- a/debian/postinst +++ b/debian/postinst @@ -84,6 +84,9 @@ allow_check_for_updates = true # Enable the display name lightning bolt on registration. enable_lightning_bolt = true +# Allows clients to request a URL preview +allow_url_preview = false + # Servers listed here will be used to gather public keys of other servers. # Generally, copying this exactly should be enough. (Currently, Conduit doesn't # support batched key requests, so this list should only contain Synapse diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 5cd2b2f9..75492893 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -2,13 +2,22 @@ use std::time::Duration; use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma}; use ruma::api::client::{ - error::ErrorKind, + error::{ErrorKind, RetryAfter}, media::{ create_content, get_content, get_content_as_filename, get_content_thumbnail, - get_media_config, + get_media_config, get_media_preview }, }; +#[cfg(feature = "url_preview")] +use { + crate::service::media::UrlPreviewData, + webpage::HTML, + std::{io::Cursor, net::IpAddr, sync::Arc, time::Duration}, + tokio::sync::Notify, + image::io::Reader as ImgReader, +}; + const MXC_LENGTH: usize = 32; /// # `GET /_matrix/media/r0/config` @@ -22,6 +31,230 @@ pub async fn get_media_config_route( }) } +#[cfg(feature = "url_preview")] +async fn download_image( + client: &reqwest::Client, + url: &str, +) -> Result { + let image = client.get(url).send().await?.bytes().await?; + let mxc = format!( + "mxc://{}/{}", + services().globals.server_name(), + utils::random_string(MXC_LENGTH) + ); + services().media + .create(mxc.clone(), None, None, &image) + .await?; + + let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() { + Err(_) => (None, None), + Ok(reader) => match reader.into_dimensions() { + Err(_) => (None, None), + Ok((width, height)) => (Some(width), Some(height)), + }, + }; + + Ok(UrlPreviewData { + image: Some(mxc), + image_size: Some(image.len()), + image_width: width, + image_height: height, + ..Default::default() + }) +} + +#[cfg(feature = "url_preview")] +async fn download_html( + client: &reqwest::Client, + url: &str, +) -> Result { + let max_download_size = 300_000; + + let mut response = client.get(url).send().await?; + + let mut bytes: Vec = Vec::new(); + while let Some(chunk) = response.chunk().await? { + bytes.extend_from_slice(&chunk); + if bytes.len() > max_download_size { + break; + } + } + let body = String::from_utf8_lossy(&bytes); + let html = match HTML::from_string(body.to_string(), Some(url.to_owned())) { + Ok(html) => html, + Err(_) => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Failed to parse HTML", + )) + } + }; + + let mut data = match html.opengraph.images.first() { + None => UrlPreviewData::default(), + Some(obj) => download_image(client, &obj.url).await?, + }; + + let props = html.opengraph.properties; + /* use OpenGraph title/description, but fall back to HTML if not available */ + data.title = props.get("title").cloned().or(html.title); + data.description = props.get("description").cloned().or(html.description); + Ok(data) +} + +#[cfg(feature = "url_preview")] +fn url_request_allowed(addr: &IpAddr) -> bool { + // could be implemented with reqwest when it supports IP filtering: + // https://github.com/seanmonstar/reqwest/issues/1515 + + // TODO: simplify to .is_global() when it has been stabilized + match addr { + IpAddr::V4(ip4) => { + !(ip4.is_private() + || ip4.is_loopback() + || ip4.is_link_local() + || ip4.is_multicast() + || ip4.is_broadcast() + || ip4.is_documentation() + || ip4.is_unspecified()) + } + IpAddr::V6(ip6) => !(ip6.is_loopback() || ip6.is_multicast() || ip6.is_unspecified()), + } +} + +#[cfg(feature = "url_preview")] +async fn request_url_preview(url: String) -> Result { + let client = services().globals.default_client(); + let response = client.head(&url).send().await?; + + if !response + .remote_addr() + .map_or(false, |a| url_request_allowed(&a.ip())) + { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Requesting from this address forbidden", + )); + } + + let content_type = match response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|x| x.to_str().ok()) + { + Some(ct) => ct, + None => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Unknown Content-Type", + )) + } + }; + let data = match content_type { + html if html.starts_with("text/html") => download_html(&client, &url).await?, + img if img.starts_with("image/") => download_image(&client, &url).await?, + _ => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Unsupported Content-Type", + )) + } + }; + + services().media.set_url_preview(&url, &data).await?; + + Ok(data) +} + +#[cfg(feature = "url_preview")] +async fn get_url_preview(url: String) -> Result { + if let Some(preview) = services().media.get_url_preview(&url).await { + return Ok(preview); + } + + let notif_opt = services() + .media + .url_preview_requests + .read() + .unwrap() + .get(&url) + .cloned(); + + match notif_opt { + None => { + let notifier = Arc::new(Notify::new()); + { + services().media + .url_preview_requests + .write() + .unwrap() + .insert(url.clone(), notifier.clone()); + } + + let data = request_url_preview(url.clone()).await; + + notifier.notify_waiters(); + + { + services().media.url_preview_requests.write().unwrap().remove(&url); + } + + data + } + Some(notifier) => { + // wait until being notified that request is finished + let notifier = notifier.clone(); + let notifier = notifier.notified(); + notifier.await; + + services().media + .get_url_preview(&url) + .await + .ok_or(Error::BadRequest( + ErrorKind::Unknown, + "No Preview available", + )) + } + } +} + +/// # `GET /_matrix/media/r0/preview_url` +/// +/// Returns URL preview. +#[cfg(feature = "url_preview")] +pub async fn get_media_preview_route( + body: Ruma, +) -> Result { + if !services().globals.allow_url_preview() { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Previewing URL not allowed", + )); + } + + if let Ok(preview) = get_url_preview(body.url.clone()).await { + let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); + return Ok(get_media_preview::v3::Response::from_raw_value(res)); + } + + Err(Error::BadRequest( + ErrorKind::LimitExceeded { + retry_after: Some(RetryAfter::Delay(Duration::from_secs(5))), + }, + "Retry later", + )) +} + +#[cfg(not(feature = "url_preview"))] +pub async fn get_media_preview_route( + _body: Ruma, +) -> Result { + Err(Error::BadRequest( + ErrorKind::Forbidden, + "URL preview not implemented", + )) +} + /// # `POST /_matrix/media/r0/upload` /// /// Permanently save media in the server. diff --git a/src/config/mod.rs b/src/config/mod.rs index 378ab929..d661a5de 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -53,6 +53,8 @@ pub struct Config { pub allow_encryption: bool, #[serde(default = "false_fn")] pub allow_federation: bool, + #[serde(default = "false_fn")] + pub allow_url_preview: bool, #[serde(default = "true_fn")] pub allow_room_creation: bool, #[serde(default = "true_fn")] @@ -184,6 +186,7 @@ impl fmt::Display for Config { ), ("Allow encryption", &self.allow_encryption.to_string()), ("Allow federation", &self.allow_federation.to_string()), + ("Allow URL preview", &self.allow_url_preview.to_string()), ("Allow room creation", &self.allow_room_creation.to_string()), ( "JWT secret", diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 6abe5ba5..2ad5e3fc 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -2,6 +2,9 @@ use ruma::api::client::error::ErrorKind; use crate::{database::KeyValueDatabase, service, utils, Error, Result}; +#[cfg(feature = "url_preview")] +use crate::service::media::UrlPreviewData; + impl service::media::Data for KeyValueDatabase { fn create_file_metadata( &self, @@ -79,4 +82,110 @@ impl service::media::Data for KeyValueDatabase { }; Ok((content_disposition, content_type, key)) } + + #[cfg(feature = "url_preview")] + fn remove_url_preview(&self, url: &str) -> Result<()> { + self.url_previews.remove(url.as_bytes()) + } + + #[cfg(feature = "url_preview")] + fn set_url_preview(&self, url: &str, data: &UrlPreviewData, timestamp: std::time::Duration) -> Result<()> { + let mut value = Vec::::new(); + value.extend_from_slice(×tamp.as_secs().to_be_bytes()); + value.push(0xff); + value.extend_from_slice( + data.title + .as_ref() + .map(|t| t.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice( + data.description + .as_ref() + .map(|d| d.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice( + data.image + .as_ref() + .map(|i| i.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes()); + value.push(0xff); + value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes()); + value.push(0xff); + value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes()); + + self.url_previews.insert(url.as_bytes(), &value) + } + + #[cfg(feature = "url_preview")] + fn get_url_preview(&self, url: &str) -> Option { + let values = self.url_previews.get(url.as_bytes()).ok()??; + + let mut values = values.split(|&b| b == 0xff); + + let _ts = match values + .next() + .map(|b| u64::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let title = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let description = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let image = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let image_size = match values + .next() + .map(|b| usize::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let image_width = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let image_height = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + + Some(UrlPreviewData { + title, + description, + image, + image_size, + image_width, + image_height, + }) + } } diff --git a/src/database/mod.rs b/src/database/mod.rs index 5171d4bb..40446e9c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -146,6 +146,8 @@ pub struct KeyValueDatabase { //pub media: media::Media, pub(super) mediaid_file: Arc, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType + #[cfg(feature = "url_preview")] + pub(super) url_previews: Arc, //pub key_backups: key_backups::KeyBackups, pub(super) backupid_algorithm: Arc, // BackupId = UserId + Version(Count) pub(super) backupid_etag: Arc, // BackupId = UserId + Version(Count) @@ -362,6 +364,8 @@ impl KeyValueDatabase { roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?, roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?, mediaid_file: builder.open_tree("mediaid_file")?, + #[cfg(feature = "url_preview")] + url_previews: builder.open_tree("url_previews")?, backupid_algorithm: builder.open_tree("backupid_algorithm")?, backupid_etag: builder.open_tree("backupid_etag")?, backupkeyid_backup: builder.open_tree("backupkeyid_backup")?, diff --git a/src/main.rs b/src/main.rs index 8d242c53..232aa2cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -379,6 +379,7 @@ fn routes(config: &Config) -> Router { .ruma_route(client_server::turn_server_route) .ruma_route(client_server::send_event_to_device_route) .ruma_route(client_server::get_media_config_route) + .ruma_route(client_server::get_media_preview_route) .ruma_route(client_server::create_content_route) .ruma_route(client_server::get_content_route) .ruma_route(client_server::get_content_as_filename_route) diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index fc695f86..513f2bc7 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -324,6 +324,10 @@ impl Service { self.config.allow_federation } + pub fn allow_url_preview(&self) -> bool { + self.config.allow_url_preview + } + pub fn allow_room_creation(&self) -> bool { self.config.allow_room_creation } diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 75a682cb..c4621814 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -17,4 +17,24 @@ pub trait Data: Send + Sync { width: u32, height: u32, ) -> Result<(Option, Option, Vec)>; + + #[cfg(feature = "url_preview")] + fn remove_url_preview( + &self, + url: &str + ) -> Result<()>; + + #[cfg(feature = "url_preview")] + fn set_url_preview( + &self, + url: &str, + data: &super::UrlPreviewData, + timestamp: std::time::Duration, + ) -> Result<()>; + + #[cfg(feature = "url_preview")] + fn get_url_preview( + &self, + url: &str + ) -> Option; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 0340ab49..ef0d752c 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -11,14 +11,62 @@ use tokio::{ io::{AsyncReadExt, AsyncWriteExt, BufReader}, }; +#[cfg(feature = "url_preview")] +use { + std::{ + collections::HashMap, + sync::{Arc, RwLock}, + }, + serde::Serialize, + std::time::SystemTime, + tokio::sync::Notify, +}; + pub struct FileMeta { pub content_disposition: Option, pub content_type: Option, pub file: Vec, } +#[cfg(feature = "url_preview")] +#[derive(Serialize, Default)] +pub struct UrlPreviewData { + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:title") + )] + pub title: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:description") + )] + pub description: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image") + )] + pub image: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "matrix:image:size") + )] + pub image_size: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image:width") + )] + pub image_width: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image:height") + )] + pub image_height: Option, +} + pub struct Service { pub db: &'static dyn Data, + #[cfg(feature = "url_preview")] + pub url_preview_requests: RwLock>>, } impl Service { @@ -225,4 +273,23 @@ impl Service { Ok(None) } } + + #[cfg(feature = "url_preview")] + pub async fn get_url_preview(&self, url: &str) -> Option { + self.db.get_url_preview(url) + } + + #[cfg(feature = "url_preview")] + pub async fn remove_url_preview(&self, url: &str) -> Result<()> { + // TODO: also remove the downloaded image + self.db.remove_url_preview(url) + } + + #[cfg(feature = "url_preview")] + pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("valid system time"); + self.db.set_url_preview(url, data, now) + } } diff --git a/src/service/mod.rs b/src/service/mod.rs index 4c11bc18..c5ba84a9 100644 --- a/src/service/mod.rs +++ b/src/service/mod.rs @@ -3,6 +3,9 @@ use std::{ sync::{Arc, Mutex as StdMutex}, }; +#[cfg(feature = "url_preview")] +use std::sync::RwLock; + use lru_cache::LruCache; use tokio::sync::{broadcast, Mutex}; @@ -118,7 +121,11 @@ impl Services { account_data: account_data::Service { db }, admin: admin::Service::build(), key_backups: key_backups::Service { db }, - media: media::Service { db }, + media: media::Service { + db, + #[cfg(feature = "url_preview")] + url_preview_requests: RwLock::new(HashMap::new()) + }, sending: sending::Service::build(db, &config), globals: globals::Service::load(db, config)?, From 61fd9166f643a1775c133fe4fd05e81ebf6af461 Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sat, 29 Jul 2023 00:38:58 +0200 Subject: [PATCH 02/15] Change URL preview setting from bool to a mode, and add support for an allowlist --- conduit-example.toml | 10 ++++-- debian/postinst | 10 ++++-- src/api/client_server/media.rs | 56 +++++++++++++++++++++++++--------- src/config/mod.rs | 31 +++++++++++++++++-- src/service/globals/mod.rs | 9 ++++-- 5 files changed, 90 insertions(+), 26 deletions(-) diff --git a/conduit-example.toml b/conduit-example.toml index 969c0074..260517ae 100644 --- a/conduit-example.toml +++ b/conduit-example.toml @@ -47,9 +47,6 @@ registration_token = "" allow_check_for_updates = true allow_federation = true -# Allows clients to request a URL preview -allow_url_preview = false - # Enable the display name lightning bolt on registration. enable_lightning_bolt = true @@ -69,6 +66,13 @@ trusted_servers = ["matrix.org"] address = "127.0.0.1" # This makes sure Conduit can only be reached using the reverse proxy #address = "0.0.0.0" # If Conduit is running in a container, make sure the reverse proxy (ie. Traefik) can reach it. +# possible URL preview modes: +# None: previews disabled +# All: previews for any URL allowed +# Allowlist: only domains in `url_preview_allowlist` are allowed +url_preview_mode = "None" +url_preview_allowlist = ["google.com", "youtube.com", "www.youtube.com"] + [global.well_known] # Conduit handles the /.well-known/matrix/* endpoints, making both clients and servers try to access conduit with the host # server_name and port 443 by default. diff --git a/debian/postinst b/debian/postinst index 8738ffe8..27afc03b 100644 --- a/debian/postinst +++ b/debian/postinst @@ -84,9 +84,6 @@ allow_check_for_updates = true # Enable the display name lightning bolt on registration. enable_lightning_bolt = true -# Allows clients to request a URL preview -allow_url_preview = false - # Servers listed here will be used to gather public keys of other servers. # Generally, copying this exactly should be enough. (Currently, Conduit doesn't # support batched key requests, so this list should only contain Synapse @@ -99,6 +96,13 @@ trusted_servers = ["matrix.org"] # # [0]: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives #log = "..." + +# possible URL preview modes: +# None: previews disabled +# All: previews for any URL allowed +# Allowlist: only domains in \`url_preview_allowlist\` are allowed +url_preview_mode = "None" +url_preview_allowlist = ["google.com", "youtube.com", "www.youtube.com"] EOF fi ;; diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 75492893..16f0d90a 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -11,9 +11,11 @@ use ruma::api::client::{ #[cfg(feature = "url_preview")] use { + crate::config::UrlPreviewMode, crate::service::media::UrlPreviewData, webpage::HTML, - std::{io::Cursor, net::IpAddr, sync::Arc, time::Duration}, + reqwest::Url, + std::{io::Cursor, net::IpAddr, sync::Arc}, tokio::sync::Notify, image::io::Reader as ImgReader, }; @@ -123,9 +125,9 @@ fn url_request_allowed(addr: &IpAddr) -> bool { } #[cfg(feature = "url_preview")] -async fn request_url_preview(url: String) -> Result { +async fn request_url_preview(url: &str) -> Result { let client = services().globals.default_client(); - let response = client.head(&url).send().await?; + let response = client.head(url).send().await?; if !response .remote_addr() @@ -151,8 +153,8 @@ async fn request_url_preview(url: String) -> Result { } }; let data = match content_type { - html if html.starts_with("text/html") => download_html(&client, &url).await?, - img if img.starts_with("image/") => download_image(&client, &url).await?, + html if html.starts_with("text/html") => download_html(&client, url).await?, + img if img.starts_with("image/") => download_image(&client, url).await?, _ => { return Err(Error::BadRequest( ErrorKind::Unknown, @@ -161,14 +163,14 @@ async fn request_url_preview(url: String) -> Result { } }; - services().media.set_url_preview(&url, &data).await?; + services().media.set_url_preview(url, &data).await?; Ok(data) } #[cfg(feature = "url_preview")] -async fn get_url_preview(url: String) -> Result { - if let Some(preview) = services().media.get_url_preview(&url).await { +async fn get_url_preview(url: &str) -> Result { + if let Some(preview) = services().media.get_url_preview(url).await { return Ok(preview); } @@ -177,7 +179,7 @@ async fn get_url_preview(url: String) -> Result { .url_preview_requests .read() .unwrap() - .get(&url) + .get(url) .cloned(); match notif_opt { @@ -188,15 +190,15 @@ async fn get_url_preview(url: String) -> Result { .url_preview_requests .write() .unwrap() - .insert(url.clone(), notifier.clone()); + .insert(url.to_string(), notifier.clone()); } - let data = request_url_preview(url.clone()).await; + let data = request_url_preview(url).await; notifier.notify_waiters(); { - services().media.url_preview_requests.write().unwrap().remove(&url); + services().media.url_preview_requests.write().unwrap().remove(url); } data @@ -208,7 +210,7 @@ async fn get_url_preview(url: String) -> Result { notifier.await; services().media - .get_url_preview(&url) + .get_url_preview(url) .await .ok_or(Error::BadRequest( ErrorKind::Unknown, @@ -218,6 +220,29 @@ async fn get_url_preview(url: String) -> Result { } } +#[cfg(feature = "url_preview")] +fn url_preview_allowed(url_str: &str) -> bool { + let url = match Url::parse(url_str) { + Ok(u) => u, + Err(_) => return false, + }; + if ["http", "https"].iter().all(|&scheme| scheme != url.scheme().to_lowercase()) { + return false; + } + match services().globals.url_preview_mode() { + UrlPreviewMode::All => true, + UrlPreviewMode::None => false, + UrlPreviewMode::Allowlist => { + match url.host_str() { + None => false, + Some(host) => { + services().globals.url_preview_allowlist().contains(&host.to_string()) + } + } + } + } +} + /// # `GET /_matrix/media/r0/preview_url` /// /// Returns URL preview. @@ -225,14 +250,15 @@ async fn get_url_preview(url: String) -> Result { pub async fn get_media_preview_route( body: Ruma, ) -> Result { - if !services().globals.allow_url_preview() { + let url = &body.url; + if !url_preview_allowed(url) { return Err(Error::BadRequest( ErrorKind::Unknown, "Previewing URL not allowed", )); } - if let Ok(preview) = get_url_preview(body.url.clone()).await { + if let Ok(preview) = get_url_preview(url).await { let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); return Ok(get_media_preview::v3::Response::from_raw_value(res)); } diff --git a/src/config/mod.rs b/src/config/mod.rs index d661a5de..cd19d38f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -13,6 +13,23 @@ mod proxy; use self::proxy::ProxyConfig; +#[derive(Clone, Copy, Debug, Deserialize)] +pub enum UrlPreviewMode { + All, + None, + Allowlist, +} + +impl ToString for UrlPreviewMode { + fn to_string(&self) -> String { + match *self { + UrlPreviewMode::All => "All".to_string(), + UrlPreviewMode::None => "None".to_string(), + UrlPreviewMode::Allowlist => "Allowlist".to_string(), + } + } +} + #[derive(Clone, Debug, Deserialize)] pub struct Config { #[serde(default = "default_address")] @@ -53,8 +70,6 @@ pub struct Config { pub allow_encryption: bool, #[serde(default = "false_fn")] pub allow_federation: bool, - #[serde(default = "false_fn")] - pub allow_url_preview: bool, #[serde(default = "true_fn")] pub allow_room_creation: bool, #[serde(default = "true_fn")] @@ -87,6 +102,11 @@ pub struct Config { pub emergency_password: Option, + #[serde(default = "default_url_preview_mode")] + pub url_preview_mode: UrlPreviewMode, + #[serde(default = "Vec::new")] + pub url_preview_allowlist: Vec, + #[serde(flatten)] pub catchall: BTreeMap, } @@ -186,7 +206,6 @@ impl fmt::Display for Config { ), ("Allow encryption", &self.allow_encryption.to_string()), ("Allow federation", &self.allow_federation.to_string()), - ("Allow URL preview", &self.allow_url_preview.to_string()), ("Allow room creation", &self.allow_room_creation.to_string()), ( "JWT secret", @@ -235,6 +254,8 @@ impl fmt::Display for Config { }), ("Well-known server name", well_known_server.as_str()), ("Well-known client URL", &self.well_known_client()), + ("URL preview mode", &self.url_preview_mode.to_string()), + ("URL preview allowlist", &self.url_preview_allowlist.join(", ")), ]; let mut msg: String = "Active config values:\n\n".to_owned(); @@ -315,3 +336,7 @@ fn default_openid_token_ttl() -> u64 { pub fn default_default_room_version() -> RoomVersionId { RoomVersionId::V10 } + +pub fn default_url_preview_mode() -> UrlPreviewMode { + UrlPreviewMode::None +} diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 513f2bc7..38962aae 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -12,6 +12,7 @@ use futures_util::FutureExt; use hickory_resolver::TokioAsyncResolver; use hyper_util::client::legacy::connect::dns::{GaiResolver, Name as HyperName}; use reqwest::dns::{Addrs, Name, Resolve, Resolving}; +use crate::config::UrlPreviewMode; use ruma::{ api::{client::sync::sync_events, federation::discovery::ServerSigningKeys}, DeviceId, RoomVersionId, ServerName, UserId, @@ -324,8 +325,12 @@ impl Service { self.config.allow_federation } - pub fn allow_url_preview(&self) -> bool { - self.config.allow_url_preview + pub fn url_preview_mode(&self) -> UrlPreviewMode { + self.config.url_preview_mode + } + + pub fn url_preview_allowlist(&self) -> &Vec { + &self.config.url_preview_allowlist } pub fn allow_room_creation(&self) -> bool { From 8f147379ea8317315a5a55ea9e2d3f57fd3fbabd Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sat, 29 Jul 2023 23:15:16 +0200 Subject: [PATCH 03/15] Drop mode and add special allowlist entries '*' and 'default' --- conduit-example.toml | 7 +----- debian/postinst | 7 +----- src/api/client_server/media.rs | 41 ++++++++++++++++++++++++---------- src/config/mod.rs | 24 -------------------- src/service/globals/mod.rs | 5 ----- 5 files changed, 31 insertions(+), 53 deletions(-) diff --git a/conduit-example.toml b/conduit-example.toml index 260517ae..f35a3d63 100644 --- a/conduit-example.toml +++ b/conduit-example.toml @@ -66,12 +66,7 @@ trusted_servers = ["matrix.org"] address = "127.0.0.1" # This makes sure Conduit can only be reached using the reverse proxy #address = "0.0.0.0" # If Conduit is running in a container, make sure the reverse proxy (ie. Traefik) can reach it. -# possible URL preview modes: -# None: previews disabled -# All: previews for any URL allowed -# Allowlist: only domains in `url_preview_allowlist` are allowed -url_preview_mode = "None" -url_preview_allowlist = ["google.com", "youtube.com", "www.youtube.com"] +url_preview_allowlist = [] [global.well_known] # Conduit handles the /.well-known/matrix/* endpoints, making both clients and servers try to access conduit with the host diff --git a/debian/postinst b/debian/postinst index 27afc03b..cf8937ff 100644 --- a/debian/postinst +++ b/debian/postinst @@ -97,12 +97,7 @@ trusted_servers = ["matrix.org"] # [0]: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives #log = "..." -# possible URL preview modes: -# None: previews disabled -# All: previews for any URL allowed -# Allowlist: only domains in \`url_preview_allowlist\` are allowed -url_preview_mode = "None" -url_preview_allowlist = ["google.com", "youtube.com", "www.youtube.com"] +url_preview_allowlist = [] EOF fi ;; diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 16f0d90a..41d6b9be 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -11,7 +11,6 @@ use ruma::api::client::{ #[cfg(feature = "url_preview")] use { - crate::config::UrlPreviewMode, crate::service::media::UrlPreviewData, webpage::HTML, reqwest::Url, @@ -190,7 +189,7 @@ async fn get_url_preview(url: &str) -> Result { .url_preview_requests .write() .unwrap() - .insert(url.to_string(), notifier.clone()); + .insert(url.to_owned(), notifier.clone()); } let data = request_url_preview(url).await; @@ -222,6 +221,13 @@ async fn get_url_preview(url: &str) -> Result { #[cfg(feature = "url_preview")] fn url_preview_allowed(url_str: &str) -> bool { + const DEFAULT_ALLOWLIST: &[&str] = &[ + "matrix.org", + "mastodon.social", + "youtube.com", + "wikipedia.org", + ]; + let url = match Url::parse(url_str) { Ok(u) => u, Err(_) => return false, @@ -229,18 +235,29 @@ fn url_preview_allowed(url_str: &str) -> bool { if ["http", "https"].iter().all(|&scheme| scheme != url.scheme().to_lowercase()) { return false; } - match services().globals.url_preview_mode() { - UrlPreviewMode::All => true, - UrlPreviewMode::None => false, - UrlPreviewMode::Allowlist => { - match url.host_str() { - None => false, - Some(host) => { - services().globals.url_preview_allowlist().contains(&host.to_string()) - } - } + let mut host = match url.host_str() { + None => return false, + Some(h) => h.to_lowercase(), + }; + + let allowlist = services().globals.url_preview_allowlist(); + if allowlist.contains(&"*".to_owned()) { + return true; + } + while !host.is_empty() { + if allowlist.contains(&host) { + return true; + } + if allowlist.contains(&"default".to_owned()) && DEFAULT_ALLOWLIST.contains(&host.as_str()) { + return true; + } + /* also check higher level domains, so that e.g. `en.m.wikipedia.org` is matched by `wikipedia.org` on allowlist. */ + host = match host.split_once('.') { + None => return false, + Some((_, domain)) => domain.to_owned(), } } + false } /// # `GET /_matrix/media/r0/preview_url` diff --git a/src/config/mod.rs b/src/config/mod.rs index cd19d38f..b8eef648 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -13,23 +13,6 @@ mod proxy; use self::proxy::ProxyConfig; -#[derive(Clone, Copy, Debug, Deserialize)] -pub enum UrlPreviewMode { - All, - None, - Allowlist, -} - -impl ToString for UrlPreviewMode { - fn to_string(&self) -> String { - match *self { - UrlPreviewMode::All => "All".to_string(), - UrlPreviewMode::None => "None".to_string(), - UrlPreviewMode::Allowlist => "Allowlist".to_string(), - } - } -} - #[derive(Clone, Debug, Deserialize)] pub struct Config { #[serde(default = "default_address")] @@ -102,8 +85,6 @@ pub struct Config { pub emergency_password: Option, - #[serde(default = "default_url_preview_mode")] - pub url_preview_mode: UrlPreviewMode, #[serde(default = "Vec::new")] pub url_preview_allowlist: Vec, @@ -254,7 +235,6 @@ impl fmt::Display for Config { }), ("Well-known server name", well_known_server.as_str()), ("Well-known client URL", &self.well_known_client()), - ("URL preview mode", &self.url_preview_mode.to_string()), ("URL preview allowlist", &self.url_preview_allowlist.join(", ")), ]; @@ -336,7 +316,3 @@ fn default_openid_token_ttl() -> u64 { pub fn default_default_room_version() -> RoomVersionId { RoomVersionId::V10 } - -pub fn default_url_preview_mode() -> UrlPreviewMode { - UrlPreviewMode::None -} diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 38962aae..e1762054 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -12,7 +12,6 @@ use futures_util::FutureExt; use hickory_resolver::TokioAsyncResolver; use hyper_util::client::legacy::connect::dns::{GaiResolver, Name as HyperName}; use reqwest::dns::{Addrs, Name, Resolve, Resolving}; -use crate::config::UrlPreviewMode; use ruma::{ api::{client::sync::sync_events, federation::discovery::ServerSigningKeys}, DeviceId, RoomVersionId, ServerName, UserId, @@ -325,10 +324,6 @@ impl Service { self.config.allow_federation } - pub fn url_preview_mode(&self) -> UrlPreviewMode { - self.config.url_preview_mode - } - pub fn url_preview_allowlist(&self) -> &Vec { &self.config.url_preview_allowlist } From d6e3d9aa8a8253b33a4bd847328d542211076930 Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sun, 30 Jul 2023 13:02:34 +0200 Subject: [PATCH 04/15] Drop feature flag, as it's no longer required --- Cargo.lock | 596 ++++++++++++++++++++++++++++---- Cargo.toml | 3 +- src/api/client_server/media.rs | 21 +- src/database/key_value/media.rs | 8 +- src/database/mod.rs | 2 - src/service/media/data.rs | 3 - src/service/media/mod.rs | 25 +- src/service/mod.rs | 8 +- 8 files changed, 533 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea84fc09..8115f3ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,7 +99,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -110,7 +110,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -328,7 +328,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.66", ] [[package]] @@ -400,9 +400,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" dependencies = [ "jobserver", "libc", @@ -443,9 +443,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.4" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" dependencies = [ "clap_builder", "clap_derive", @@ -453,9 +453,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.2" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" dependencies = [ "anstyle", "clap_lex", @@ -463,21 +463,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.4" +version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" [[package]] name = "color_quant" @@ -542,6 +542,7 @@ dependencies = [ "tracing-opentelemetry", "tracing-subscriber", "url", + "webpage", ] [[package]] @@ -661,7 +662,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -727,6 +728,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "ed25519" version = "2.2.3" @@ -767,7 +779,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -852,6 +864,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -892,7 +914,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1136,6 +1158,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "http" version = "0.2.12" @@ -1181,12 +1217,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http 1.1.0", "http-body 1.0.0", "pin-project-lite", @@ -1194,9 +1230,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "d0e7a4dd27b9476dc40cb050d3632d3bba3a70ddbff012285f7f8559a1e7e545" [[package]] name = "httpdate" @@ -1298,6 +1334,124 @@ dependencies = [ "tracing", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "idna" version = "0.4.0" @@ -1310,12 +1464,14 @@ dependencies = [ [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -1531,6 +1687,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -1558,20 +1720,52 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" dependencies = [ "cc", "libc", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -1595,9 +1789,9 @@ checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "mime" @@ -1632,6 +1826,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nix" version = "0.28.0" @@ -1710,9 +1910,9 @@ dependencies = [ [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -1876,7 +2076,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1911,6 +2111,44 @@ dependencies = [ "zigzag", ] +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -1928,7 +2166,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1990,6 +2228,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-crate" version = "3.1.0" @@ -2016,7 +2260,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "version_check", "yansi", ] @@ -2041,7 +2285,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2091,9 +2335,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ "bitflags 2.5.0", ] @@ -2111,14 +2355,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -2132,13 +2376,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -2149,9 +2393,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" @@ -2223,7 +2467,7 @@ dependencies = [ [[package]] name = "ruma" version = "0.10.1" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "assign", "js_int", @@ -2244,7 +2488,7 @@ dependencies = [ [[package]] name = "ruma-appservice-api" version = "0.10.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "js_int", "ruma-common", @@ -2256,7 +2500,7 @@ dependencies = [ [[package]] name = "ruma-client-api" version = "0.18.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "as_variant", "assign", @@ -2279,7 +2523,7 @@ dependencies = [ [[package]] name = "ruma-common" version = "0.13.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "as_variant", "base64 0.22.1", @@ -2309,7 +2553,7 @@ dependencies = [ [[package]] name = "ruma-events" version = "0.28.1" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "as_variant", "indexmap 2.2.6", @@ -2331,7 +2575,7 @@ dependencies = [ [[package]] name = "ruma-federation-api" version = "0.9.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "js_int", "ruma-common", @@ -2343,7 +2587,7 @@ dependencies = [ [[package]] name = "ruma-identifiers-validation" version = "0.9.5" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "js_int", "thiserror", @@ -2352,7 +2596,7 @@ dependencies = [ [[package]] name = "ruma-identity-service-api" version = "0.9.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "js_int", "ruma-common", @@ -2362,7 +2606,7 @@ dependencies = [ [[package]] name = "ruma-macros" version = "0.13.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "once_cell", "proc-macro-crate", @@ -2370,14 +2614,14 @@ dependencies = [ "quote", "ruma-identifiers-validation", "serde", - "syn", + "syn 2.0.66", "toml", ] [[package]] name = "ruma-push-gateway-api" version = "0.9.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "js_int", "ruma-common", @@ -2389,7 +2633,7 @@ dependencies = [ [[package]] name = "ruma-server-util" version = "0.3.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "headers", "ruma-common", @@ -2400,7 +2644,7 @@ dependencies = [ [[package]] name = "ruma-signatures" version = "0.15.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "base64 0.22.1", "ed25519-dalek", @@ -2416,7 +2660,7 @@ dependencies = [ [[package]] name = "ruma-state-res" version = "0.11.0" -source = "git+https://github.com/ruma/ruma#ef40b184b7410a93e933b4ad719a72aea1bdd20e" +source = "git+https://github.com/ruma/ruma#c21817436979acbe66d43064498920a6d289b562" dependencies = [ "itertools", "js_int", @@ -2665,7 +2909,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2820,6 +3064,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2861,6 +3111,38 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "subslice" version = "0.2.3" @@ -2876,6 +3158,17 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.66" @@ -2899,6 +3192,28 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.61" @@ -2916,7 +3231,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -2989,6 +3304,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -3040,7 +3365,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -3142,7 +3467,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.11", + "winnow 0.6.13", ] [[package]] @@ -3242,7 +3567,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -3390,12 +3715,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", - "idna 0.5.0", + "idna 1.0.0", "percent-encoding", "serde", ] @@ -3406,6 +3731,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.8.0" @@ -3469,7 +3812,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -3503,7 +3846,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3534,6 +3877,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpage" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8598785beeb5af95abe95e7bb20c7e747d1188347080d6811d5a56d2b9a5f368" +dependencies = [ + "html5ever", + "markup5ever_rcdom", + "serde_json", +] + [[package]] name = "weezl" version = "0.1.8" @@ -3724,9 +4078,9 @@ dependencies = [ [[package]] name = "winnow" -version = "0.6.11" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c52728401e1dc672a56e81e593e912aa54c78f40246869f78359a2bf24d29d" +checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" dependencies = [ "memchr", ] @@ -3751,6 +4105,29 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "xml5ever" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +dependencies = [ + "log", + "mac", + "markup5ever", +] + [[package]] name = "yansi" version = "1.0.1" @@ -3763,6 +4140,30 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfe269e7b803a5e8e20cbd97860e136529cd83bf2c9c6d37b142467e7e1f051f" +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -3780,7 +4181,28 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", ] [[package]] @@ -3789,6 +4211,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "zigzag" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 766cf49c..d493383e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,7 @@ tikv-jemallocator = { version = "0.5.0", features = [ sd-notify = { version = "0.4.1", optional = true } -webpage = { version = "1.6", default-features = false, optional = true } +webpage = { version = "1.6", default-features = false } # Used for matrix spec type definitions and helpers [dependencies.ruma] @@ -188,7 +188,6 @@ conduit_bin = ["axum"] jemalloc = ["tikv-jemallocator"] sqlite = ["parking_lot", "rusqlite", "tokio/signal"] systemd = ["sd-notify"] -url_preview = ["webpage"] [[bin]] name = "conduit" diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 41d6b9be..c6381183 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -1,6 +1,6 @@ use std::time::Duration; -use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma}; +use crate::{service::media::{FileMeta, UrlPreviewData}, services, utils, Error, Result, Ruma}; use ruma::api::client::{ error::{ErrorKind, RetryAfter}, media::{ @@ -9,9 +9,7 @@ use ruma::api::client::{ }, }; -#[cfg(feature = "url_preview")] use { - crate::service::media::UrlPreviewData, webpage::HTML, reqwest::Url, std::{io::Cursor, net::IpAddr, sync::Arc}, @@ -32,7 +30,6 @@ pub async fn get_media_config_route( }) } -#[cfg(feature = "url_preview")] async fn download_image( client: &reqwest::Client, url: &str, @@ -64,7 +61,6 @@ async fn download_image( }) } -#[cfg(feature = "url_preview")] async fn download_html( client: &reqwest::Client, url: &str, @@ -103,7 +99,6 @@ async fn download_html( Ok(data) } -#[cfg(feature = "url_preview")] fn url_request_allowed(addr: &IpAddr) -> bool { // could be implemented with reqwest when it supports IP filtering: // https://github.com/seanmonstar/reqwest/issues/1515 @@ -123,7 +118,6 @@ fn url_request_allowed(addr: &IpAddr) -> bool { } } -#[cfg(feature = "url_preview")] async fn request_url_preview(url: &str) -> Result { let client = services().globals.default_client(); let response = client.head(url).send().await?; @@ -167,7 +161,6 @@ async fn request_url_preview(url: &str) -> Result { Ok(data) } -#[cfg(feature = "url_preview")] async fn get_url_preview(url: &str) -> Result { if let Some(preview) = services().media.get_url_preview(url).await { return Ok(preview); @@ -219,7 +212,6 @@ async fn get_url_preview(url: &str) -> Result { } } -#[cfg(feature = "url_preview")] fn url_preview_allowed(url_str: &str) -> bool { const DEFAULT_ALLOWLIST: &[&str] = &[ "matrix.org", @@ -263,7 +255,6 @@ fn url_preview_allowed(url_str: &str) -> bool { /// # `GET /_matrix/media/r0/preview_url` /// /// Returns URL preview. -#[cfg(feature = "url_preview")] pub async fn get_media_preview_route( body: Ruma, ) -> Result { @@ -288,16 +279,6 @@ pub async fn get_media_preview_route( )) } -#[cfg(not(feature = "url_preview"))] -pub async fn get_media_preview_route( - _body: Ruma, -) -> Result { - Err(Error::BadRequest( - ErrorKind::Forbidden, - "URL preview not implemented", - )) -} - /// # `POST /_matrix/media/r0/upload` /// /// Permanently save media in the server. diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 2ad5e3fc..6d05a9f4 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,9 +1,6 @@ use ruma::api::client::error::ErrorKind; -use crate::{database::KeyValueDatabase, service, utils, Error, Result}; - -#[cfg(feature = "url_preview")] -use crate::service::media::UrlPreviewData; +use crate::{database::KeyValueDatabase, service::{self, media::UrlPreviewData}, utils, Error, Result}; impl service::media::Data for KeyValueDatabase { fn create_file_metadata( @@ -83,12 +80,10 @@ impl service::media::Data for KeyValueDatabase { Ok((content_disposition, content_type, key)) } - #[cfg(feature = "url_preview")] fn remove_url_preview(&self, url: &str) -> Result<()> { self.url_previews.remove(url.as_bytes()) } - #[cfg(feature = "url_preview")] fn set_url_preview(&self, url: &str, data: &UrlPreviewData, timestamp: std::time::Duration) -> Result<()> { let mut value = Vec::::new(); value.extend_from_slice(×tamp.as_secs().to_be_bytes()); @@ -123,7 +118,6 @@ impl service::media::Data for KeyValueDatabase { self.url_previews.insert(url.as_bytes(), &value) } - #[cfg(feature = "url_preview")] fn get_url_preview(&self, url: &str) -> Option { let values = self.url_previews.get(url.as_bytes()).ok()??; diff --git a/src/database/mod.rs b/src/database/mod.rs index 40446e9c..35dbb202 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -146,7 +146,6 @@ pub struct KeyValueDatabase { //pub media: media::Media, pub(super) mediaid_file: Arc, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType - #[cfg(feature = "url_preview")] pub(super) url_previews: Arc, //pub key_backups: key_backups::KeyBackups, pub(super) backupid_algorithm: Arc, // BackupId = UserId + Version(Count) @@ -364,7 +363,6 @@ impl KeyValueDatabase { roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?, roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?, mediaid_file: builder.open_tree("mediaid_file")?, - #[cfg(feature = "url_preview")] url_previews: builder.open_tree("url_previews")?, backupid_algorithm: builder.open_tree("backupid_algorithm")?, backupid_etag: builder.open_tree("backupid_etag")?, diff --git a/src/service/media/data.rs b/src/service/media/data.rs index c4621814..6e13cad0 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -18,13 +18,11 @@ pub trait Data: Send + Sync { height: u32, ) -> Result<(Option, Option, Vec)>; - #[cfg(feature = "url_preview")] fn remove_url_preview( &self, url: &str ) -> Result<()>; - #[cfg(feature = "url_preview")] fn set_url_preview( &self, url: &str, @@ -32,7 +30,6 @@ pub trait Data: Send + Sync { timestamp: std::time::Duration, ) -> Result<()>; - #[cfg(feature = "url_preview")] fn get_url_preview( &self, url: &str diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index ef0d752c..d94184ca 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,5 +1,10 @@ mod data; -use std::io::Cursor; +use std::{ + io::Cursor, + collections::HashMap, + sync::{Arc, RwLock}, + time::SystemTime, +}; pub use data::Data; @@ -9,18 +14,9 @@ use image::imageops::FilterType; use tokio::{ fs::File, io::{AsyncReadExt, AsyncWriteExt, BufReader}, + sync::Notify, }; - -#[cfg(feature = "url_preview")] -use { - std::{ - collections::HashMap, - sync::{Arc, RwLock}, - }, - serde::Serialize, - std::time::SystemTime, - tokio::sync::Notify, -}; +use serde::Serialize; pub struct FileMeta { pub content_disposition: Option, @@ -28,7 +24,6 @@ pub struct FileMeta { pub file: Vec, } -#[cfg(feature = "url_preview")] #[derive(Serialize, Default)] pub struct UrlPreviewData { #[serde( @@ -65,7 +60,6 @@ pub struct UrlPreviewData { pub struct Service { pub db: &'static dyn Data, - #[cfg(feature = "url_preview")] pub url_preview_requests: RwLock>>, } @@ -274,18 +268,15 @@ impl Service { } } - #[cfg(feature = "url_preview")] pub async fn get_url_preview(&self, url: &str) -> Option { self.db.get_url_preview(url) } - #[cfg(feature = "url_preview")] pub async fn remove_url_preview(&self, url: &str) -> Result<()> { // TODO: also remove the downloaded image self.db.remove_url_preview(url) } - #[cfg(feature = "url_preview")] pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> { let now = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) diff --git a/src/service/mod.rs b/src/service/mod.rs index c5ba84a9..6db8113f 100644 --- a/src/service/mod.rs +++ b/src/service/mod.rs @@ -1,11 +1,8 @@ use std::{ collections::{BTreeMap, HashMap}, - sync::{Arc, Mutex as StdMutex}, + sync::{Arc, Mutex as StdMutex, RwLock as StdRwLock}, }; -#[cfg(feature = "url_preview")] -use std::sync::RwLock; - use lru_cache::LruCache; use tokio::sync::{broadcast, Mutex}; @@ -123,8 +120,7 @@ impl Services { key_backups: key_backups::Service { db }, media: media::Service { db, - #[cfg(feature = "url_preview")] - url_preview_requests: RwLock::new(HashMap::new()) + url_preview_requests: StdRwLock::new(HashMap::new()) }, sending: sending::Service::build(db, &config), From fc42243ec27f3aac757dacab23c84f3c61653467 Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sun, 30 Jul 2023 23:57:03 +0200 Subject: [PATCH 05/15] Use is_global checks from Rust ipaddr crate --- src/api/client_server/media.rs | 44 +++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index c6381183..eb7298e0 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -103,18 +103,50 @@ fn url_request_allowed(addr: &IpAddr) -> bool { // could be implemented with reqwest when it supports IP filtering: // https://github.com/seanmonstar/reqwest/issues/1515 - // TODO: simplify to .is_global() when it has been stabilized + // These checks have been taken from the Rust core/net/ipaddr.rs crate, + // IpAddr::V4.is_global() and IpAddr::V6.is_global(), as .is_global is not + // yet stabilized. TODO: Once this is stable, this match can be simplified. match addr { IpAddr::V4(ip4) => { - !(ip4.is_private() + !(ip4.octets()[0] == 0 // "This network" + || ip4.is_private() + || (ip4.octets()[0] == 100 && (ip4.octets()[1] & 0b1100_0000 == 0b0100_0000)) // is_shared() || ip4.is_loopback() || ip4.is_link_local() - || ip4.is_multicast() - || ip4.is_broadcast() + // addresses reserved for future protocols (`192.0.0.0/24`) + || (ip4.octets()[0] == 192 && ip4.octets()[1] == 0 && ip4.octets()[2] == 0) || ip4.is_documentation() - || ip4.is_unspecified()) + || (ip4.octets()[0] == 198 && (ip4.octets()[1] & 0xfe) == 18) // is_benchmarking() + || (ip4.octets()[0] & 240 == 240 && !ip4.is_broadcast()) // is_reserved() + || ip4.is_broadcast()) + } + IpAddr::V6(ip6) => { + !(ip6.is_unspecified() + || ip6.is_loopback() + // IPv4-mapped Address (`::ffff:0:0/96`) + || matches!(ip6.segments(), [0, 0, 0, 0, 0, 0xffff, _, _]) + // IPv4-IPv6 Translat. (`64:ff9b:1::/48`) + || matches!(ip6.segments(), [0x64, 0xff9b, 1, _, _, _, _, _]) + // Discard-Only Address Block (`100::/64`) + || matches!(ip6.segments(), [0x100, 0, 0, 0, _, _, _, _]) + // IETF Protocol Assignments (`2001::/23`) + || (matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b < 0x200) + && !( + // Port Control Protocol Anycast (`2001:1::1`) + u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0001 + // Traversal Using Relays around NAT Anycast (`2001:1::2`) + || u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0002 + // AMT (`2001:3::/32`) + || matches!(ip6.segments(), [0x2001, 3, _, _, _, _, _, _]) + // AS112-v6 (`2001:4:112::/48`) + || matches!(ip6.segments(), [0x2001, 4, 0x112, _, _, _, _, _]) + // ORCHIDv2 (`2001:20::/28`) + || matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b >= 0x20 && b <= 0x2F) + )) + || ((ip6.segments()[0] == 0x2001) && (ip6.segments()[1] == 0xdb8)) // is_documentation() + || ((ip6.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local() + || ((ip6.segments()[0] & 0xffc0) == 0xfe80)) // is_unicast_link_local } - IpAddr::V6(ip6) => !(ip6.is_loopback() || ip6.is_multicast() || ip6.is_unspecified()), } } From 2d9248ed3be923390ca208bb2a491266c00333f1 Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sat, 19 Aug 2023 23:39:35 +0200 Subject: [PATCH 06/15] Simplify the request limiting --- src/api/client_server/media.rs | 57 +++++++++------------------------- src/service/media/mod.rs | 4 +-- src/service/mod.rs | 2 +- 3 files changed, 17 insertions(+), 46 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index eb7298e0..8b37adf0 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -13,7 +13,6 @@ use { webpage::HTML, reqwest::Url, std::{io::Cursor, net::IpAddr, sync::Arc}, - tokio::sync::Notify, image::io::Reader as ImgReader, }; @@ -198,49 +197,21 @@ async fn get_url_preview(url: &str) -> Result { return Ok(preview); } - let notif_opt = services() - .media - .url_preview_requests - .read() - .unwrap() - .get(url) - .cloned(); + // ensure that only one request is made per URL + let mutex_request = Arc::clone( + services() + .media + .url_preview_mutex + .write() + .unwrap() + .entry(url.to_owned()) + .or_default(), + ); + let _request_lock = mutex_request.lock().await; - match notif_opt { - None => { - let notifier = Arc::new(Notify::new()); - { - services().media - .url_preview_requests - .write() - .unwrap() - .insert(url.to_owned(), notifier.clone()); - } - - let data = request_url_preview(url).await; - - notifier.notify_waiters(); - - { - services().media.url_preview_requests.write().unwrap().remove(url); - } - - data - } - Some(notifier) => { - // wait until being notified that request is finished - let notifier = notifier.clone(); - let notifier = notifier.notified(); - notifier.await; - - services().media - .get_url_preview(url) - .await - .ok_or(Error::BadRequest( - ErrorKind::Unknown, - "No Preview available", - )) - } + match services().media.get_url_preview(url).await { + Some(preview) => Ok(preview), + None => request_url_preview(url).await } } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index d94184ca..3b02b919 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -14,7 +14,7 @@ use image::imageops::FilterType; use tokio::{ fs::File, io::{AsyncReadExt, AsyncWriteExt, BufReader}, - sync::Notify, + sync::Mutex, }; use serde::Serialize; @@ -60,7 +60,7 @@ pub struct UrlPreviewData { pub struct Service { pub db: &'static dyn Data, - pub url_preview_requests: RwLock>>, + pub url_preview_mutex: RwLock>>>, } impl Service { diff --git a/src/service/mod.rs b/src/service/mod.rs index 6db8113f..ec6b69a4 100644 --- a/src/service/mod.rs +++ b/src/service/mod.rs @@ -120,7 +120,7 @@ impl Services { key_backups: key_backups::Service { db }, media: media::Service { db, - url_preview_requests: StdRwLock::new(HashMap::new()) + url_preview_mutex: StdRwLock::new(HashMap::new()), }, sending: sending::Service::build(db, &config), From 1c2baef9e1c5d8fc6bb7844dcbdc7ac65cdcca9b Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Sun, 16 Jun 2024 13:21:08 +0200 Subject: [PATCH 07/15] Bump webpage version --- Cargo.lock | 125 ++++++++++++++++++++++++++++------------------------- Cargo.toml | 2 +- 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8115f3ca..d44c49ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,7 +99,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -110,7 +110,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -328,7 +328,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.66", + "syn", ] [[package]] @@ -470,7 +470,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -662,7 +662,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -736,7 +736,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -779,7 +779,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -914,7 +914,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1160,16 +1160,16 @@ dependencies = [ [[package]] name = "html5ever" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" dependencies = [ "log", "mac", "markup5ever", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1449,7 +1449,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1742,9 +1742,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "markup5ever" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" dependencies = [ "log", "phf", @@ -1756,9 +1756,9 @@ dependencies = [ [[package]] name = "markup5ever_rcdom" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" dependencies = [ "html5ever", "markup5ever", @@ -2076,7 +2076,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -2113,21 +2113,21 @@ dependencies = [ [[package]] name = "phf" -version = "0.10.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "phf_shared", + "phf_shared 0.11.2", ] [[package]] name = "phf_codegen" -version = "0.10.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.2", + "phf_shared 0.11.2", ] [[package]] @@ -2136,7 +2136,17 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ - "phf_shared", + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", "rand", ] @@ -2149,6 +2159,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -2166,7 +2185,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -2260,7 +2279,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "version_check", "yansi", ] @@ -2285,7 +2304,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -2614,7 +2633,7 @@ dependencies = [ "quote", "ruma-identifiers-validation", "serde", - "syn 2.0.66", + "syn", "toml", ] @@ -2909,7 +2928,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -3126,7 +3145,7 @@ dependencies = [ "new_debug_unreachable", "once_cell", "parking_lot", - "phf_shared", + "phf_shared 0.10.0", "precomputed-hash", "serde", ] @@ -3137,8 +3156,8 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.10.0", + "phf_shared 0.10.0", "proc-macro2", "quote", ] @@ -3158,17 +3177,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.66" @@ -3200,7 +3208,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -3231,7 +3239,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -3365,7 +3373,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -3567,7 +3575,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -3812,7 +3820,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-shared", ] @@ -3846,7 +3854,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3879,13 +3887,14 @@ dependencies = [ [[package]] name = "webpage" -version = "1.6.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8598785beeb5af95abe95e7bb20c7e747d1188347080d6811d5a56d2b9a5f368" +checksum = "70862efc041d46e6bbaa82bb9c34ae0596d090e86cbd14bd9e93b36ee6802eac" dependencies = [ "html5ever", "markup5ever_rcdom", "serde_json", + "url", ] [[package]] @@ -4119,9 +4128,9 @@ checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "xml5ever" -version = "0.17.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" dependencies = [ "log", "mac", @@ -4160,7 +4169,7 @@ checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "synstructure", ] @@ -4181,7 +4190,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -4201,7 +4210,7 @@ checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "synstructure", ] @@ -4230,7 +4239,7 @@ checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d493383e..ad87827d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,7 @@ tikv-jemallocator = { version = "0.5.0", features = [ sd-notify = { version = "0.4.1", optional = true } -webpage = { version = "1.6", default-features = false } +webpage = { version = "2.0", default-features = false } # Used for matrix spec type definitions and helpers [dependencies.ruma] From 839498ada74bacba73347c109321531874eb488f Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Wed, 30 Oct 2024 16:48:38 -0700 Subject: [PATCH 08/15] Parse URL only once --- src/api/client_server/media.rs | 74 ++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 77db7f83..fcbbdd15 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -174,20 +174,22 @@ fn url_request_allowed(addr: &IpAddr) -> bool { } } -async fn request_url_preview(url: &str) -> Result { - let client = services().globals.default_client(); - let response = client.head(url).send().await?; - - if !response - .remote_addr() - .map_or(false, |a| url_request_allowed(&a.ip())) - { - return Err(Error::BadRequest( - ErrorKind::Unknown, - "Requesting from this address forbidden", - )); +async fn request_url_preview(url: &Url) -> Result { + // resolve host to IP to ensure it's not a local IP (host guaranteed to not be None) + let dns_resolver = services().globals.dns_resolver(); + match dns_resolver.lookup_ip(url.host_str().unwrap()).await { + Err(_) => { + return Err(Error::BadServerResponse("Failed to resolve media preview host")); + }, + Ok(lookup) if lookup.iter().any(|ip| !url_request_allowed(&ip)) => { + return Err(Error::BadRequest(ErrorKind::Unknown, "Requesting from this address forbidden")); + }, + Ok(_) => { }, } + let client = services().globals.default_client(); + let response = client.head(url.as_str()).send().await?; + let content_type = match response .headers() .get(reqwest::header::CONTENT_TYPE) @@ -202,8 +204,8 @@ async fn request_url_preview(url: &str) -> Result { } }; let data = match content_type { - html if html.starts_with("text/html") => download_html(&client, url).await?, - img if img.starts_with("image/") => download_image(&client, url).await?, + html if html.starts_with("text/html") => download_html(&client, url.as_str()).await?, + img if img.starts_with("image/") => download_image(&client, url.as_str()).await?, _ => { return Err(Error::BadRequest( ErrorKind::Unknown, @@ -212,13 +214,13 @@ async fn request_url_preview(url: &str) -> Result { } }; - services().media.set_url_preview(url, &data).await?; + services().media.set_url_preview(url.as_str(), &data).await?; Ok(data) } -async fn get_url_preview(url: &str) -> Result { - if let Some(preview) = services().media.get_url_preview(url).await { +async fn get_url_preview(url: &Url) -> Result { + if let Some(preview) = services().media.get_url_preview(url.as_str()).await { return Ok(preview); } @@ -229,18 +231,18 @@ async fn get_url_preview(url: &str) -> Result { .url_preview_mutex .write() .unwrap() - .entry(url.to_owned()) + .entry(url.as_str().to_owned()) .or_default(), ); let _request_lock = mutex_request.lock().await; - match services().media.get_url_preview(url).await { + match services().media.get_url_preview(url.as_str()).await { Some(preview) => Ok(preview), None => request_url_preview(url).await } } -fn url_preview_allowed(url_str: &str) -> bool { +fn url_preview_allowed(url: &Url) -> bool { const DEFAULT_ALLOWLIST: &[&str] = &[ "matrix.org", "mastodon.social", @@ -248,13 +250,6 @@ fn url_preview_allowed(url_str: &str) -> bool { "wikipedia.org", ]; - let url = match Url::parse(url_str) { - Ok(u) => u, - Err(_) => return false, - }; - if ["http", "https"].iter().all(|&scheme| scheme != url.scheme().to_lowercase()) { - return false; - } let mut host = match url.host_str() { None => return false, Some(h) => h.to_lowercase(), @@ -286,15 +281,34 @@ fn url_preview_allowed(url_str: &str) -> bool { pub async fn get_media_preview_route( body: Ruma, ) -> Result { - let url = &body.url; - if !url_preview_allowed(url) { + let url = match Url::parse(&body.url) { + Err(_) => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Not a valid URL", + )); + }, + Ok(u) + if u.scheme() != "http" + && u.scheme() != "https" + || u.host().is_none() + => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Not a valid HTTP URL", + )); + }, + Ok(url) => url, + }; + + if !url_preview_allowed(&url) { return Err(Error::BadRequest( ErrorKind::Unknown, "Previewing URL not allowed", )); } - if let Ok(preview) = get_url_preview(url).await { + if let Ok(preview) = get_url_preview(&url).await { let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); return Ok(get_media_preview::v3::Response::from_raw_value(res)); } From c8d5b05855eaa3dcd775cbb9d548b683156d5be6 Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Wed, 30 Oct 2024 21:16:24 -0700 Subject: [PATCH 09/15] Preview titles/images required --- src/api/client_server/media.rs | 6 +++--- src/database/key_value/media.rs | 24 ++++++------------------ src/service/media/mod.rs | 6 ++---- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index fcbbdd15..f6022c3e 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -77,7 +77,7 @@ async fn download_image( }; Ok(UrlPreviewData { - image: Some(mxc), + image: mxc, image_size: Some(image.len()), image_width: width, image_height: height, @@ -118,7 +118,7 @@ async fn download_html( let props = html.opengraph.properties; /* use OpenGraph title/description, but fall back to HTML if not available */ - data.title = props.get("title").cloned().or(html.title); + data.title = props.get("title").cloned().or(html.title).unwrap_or(String::from(url)); data.description = props.get("description").cloned().or(html.description); Ok(data) } @@ -192,7 +192,7 @@ async fn request_url_preview(url: &Url) -> Result { let content_type = match response .headers() - .get(reqwest::header::CONTENT_TYPE) + .get(CONTENT_TYPE) .and_then(|x| x.to_str().ok()) { Some(ct) => ct, diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index cd67a583..165eebb3 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -78,10 +78,7 @@ impl service::media::Data for KeyValueDatabase { value.extend_from_slice(×tamp.as_secs().to_be_bytes()); value.push(0xff); value.extend_from_slice( - data.title - .as_ref() - .map(|t| t.as_bytes()) - .unwrap_or_default(), + data.title.as_bytes(), ); value.push(0xff); value.extend_from_slice( @@ -92,10 +89,7 @@ impl service::media::Data for KeyValueDatabase { ); value.push(0xff); value.extend_from_slice( - data.image - .as_ref() - .map(|i| i.as_bytes()) - .unwrap_or_default(), + data.image.as_bytes(), ); value.push(0xff); value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes()); @@ -119,13 +113,10 @@ impl service::media::Data for KeyValueDatabase { Some(0) => None, x => x, }; - let title = match values + let title = values .next() .and_then(|b| String::from_utf8(b.to_vec()).ok()) - { - Some(s) if s.is_empty() => None, - x => x, - }; + .unwrap_or_default(); let description = match values .next() .and_then(|b| String::from_utf8(b.to_vec()).ok()) @@ -133,13 +124,10 @@ impl service::media::Data for KeyValueDatabase { Some(s) if s.is_empty() => None, x => x, }; - let image = match values + let image = values .next() .and_then(|b| String::from_utf8(b.to_vec()).ok()) - { - Some(s) if s.is_empty() => None, - x => x, - }; + .unwrap_or_default(); let image_size = match values .next() .map(|b| usize::from_be_bytes(b.try_into().expect("valid BE array"))) diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index d0d98def..23fb2236 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -28,20 +28,18 @@ pub struct FileMeta { #[derive(Serialize, Default)] pub struct UrlPreviewData { #[serde( - skip_serializing_if = "Option::is_none", rename(serialize = "og:title") )] - pub title: Option, + pub title: String, #[serde( skip_serializing_if = "Option::is_none", rename(serialize = "og:description") )] pub description: Option, #[serde( - skip_serializing_if = "Option::is_none", rename(serialize = "og:image") )] - pub image: Option, + pub image: String, #[serde( skip_serializing_if = "Option::is_none", rename(serialize = "matrix:image:size") From 6a4cff16610b2eef69368d253152797929ed9dc2 Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Wed, 30 Oct 2024 21:22:13 -0700 Subject: [PATCH 10/15] Rate limit => NotFound --- src/api/client_server/media.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index f6022c3e..3897d83d 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -308,17 +308,18 @@ pub async fn get_media_preview_route( )); } - if let Ok(preview) = get_url_preview(&url).await { - let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); - return Ok(get_media_preview::v3::Response::from_raw_value(res)); - } - - Err(Error::BadRequest( - ErrorKind::LimitExceeded { - retry_after: Some(RetryAfter::Delay(Duration::from_secs(5))), + match get_url_preview(&url).await { + Ok(preview) => { + let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); + Ok(get_media_preview::v3::Response::from_raw_value(res)) }, - "Retry later", - )) + Err(_) => { + Err(Error::BadRequest( + ErrorKind::NotFound, + "Failed to find preview data", + )) + }, + } } /// # `POST /_matrix/media/r0/upload` From 6789ed336e16a752290e74b43a919335d2f845af Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:34:10 -0700 Subject: [PATCH 11/15] Get rid of allowlist check allocs --- src/api/client_server/media.rs | 50 ++++++++++++++-------------------- src/service/globals/mod.rs | 4 +-- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 3897d83d..0b529851 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -242,37 +242,29 @@ async fn get_url_preview(url: &Url) -> Result { } } +/// Verify that the given URL's host is in the allow list. fn url_preview_allowed(url: &Url) -> bool { - const DEFAULT_ALLOWLIST: &[&str] = &[ - "matrix.org", - "mastodon.social", - "youtube.com", - "wikipedia.org", - ]; + // host's existence is already verified in get_media_preview_route, unwrap is safe + let host = url.host_str().unwrap().to_lowercase(); + let host_parts_iter = host + .char_indices() + .filter_map(|(i, c)| { + if i == 0 { + Some(host.as_str()) + } + else if c == '.' { + Some(&host[i+1..]) + } + else { + None + } + }) + .rev().skip(1); // don't match TLDs - let mut host = match url.host_str() { - None => return false, - Some(h) => h.to_lowercase(), - }; - - let allowlist = services().globals.url_preview_allowlist(); - if allowlist.contains(&"*".to_owned()) { - return true; - } - while !host.is_empty() { - if allowlist.contains(&host) { - return true; - } - if allowlist.contains(&"default".to_owned()) && DEFAULT_ALLOWLIST.contains(&host.as_str()) { - return true; - } - /* also check higher level domains, so that e.g. `en.m.wikipedia.org` is matched by `wikipedia.org` on allowlist. */ - host = match host.split_once('.') { - None => return false, - Some((_, domain)) => domain.to_owned(), - } - } - false + let ret = ["*"].into_iter().chain(host_parts_iter).any(|nld| { + services().globals.url_preview_allowlist().any(|a| a == nld) + }); + ret // temp variable to avoid returning from the closure } /// # `GET /_matrix/media/r0/preview_url` diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 134b797b..98c01902 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -324,8 +324,8 @@ impl Service { self.config.allow_federation } - pub fn url_preview_allowlist(&self) -> &Vec { - &self.config.url_preview_allowlist + pub fn url_preview_allowlist(&self) -> impl Iterator { + self.config.url_preview_allowlist.iter().map(|x| x.as_str()) } pub fn allow_room_creation(&self) -> bool { From b76357c80e12f08dcac380459fcbc157a6bb7de1 Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:28:51 -0700 Subject: [PATCH 12/15] More flexible preview config --- src/api/client_server/media.rs | 37 ++++++++---------- src/config/mod.rs | 44 ++++++++++++++++++++-- src/config/proxy.rs | 50 +------------------------ src/config/wild_carded_domain.rs | 64 ++++++++++++++++++++++++++++++++ src/service/globals/mod.rs | 5 ++- 5 files changed, 125 insertions(+), 75 deletions(-) create mode 100644 src/config/wild_carded_domain.rs diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 0b529851..c6d3cb8a 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -3,7 +3,10 @@ use std::time::Duration; -use crate::{service::media::{FileMeta, UrlPreviewData}, services, utils, Error, Result, Ruma}; +use crate::{ + service::media::{FileMeta, UrlPreviewData}, + config::UrlPreviewPermission, + services, utils, Error, Result, Ruma}; use http::header::{CONTENT_DISPOSITION, CONTENT_TYPE}; use ruma::{ api::{ @@ -11,7 +14,7 @@ use ruma::{ authenticated_media::{ get_content, get_content_as_filename, get_content_thumbnail, get_media_config, }, - error::{ErrorKind, RetryAfter}, + error::ErrorKind, media::{ self, create_content, get_media_preview, }, @@ -174,6 +177,7 @@ fn url_request_allowed(addr: &IpAddr) -> bool { } } +/// Generate URL preview data from the given URL async fn request_url_preview(url: &Url) -> Result { // resolve host to IP to ensure it's not a local IP (host guaranteed to not be None) let dns_resolver = services().globals.dns_resolver(); @@ -219,6 +223,7 @@ async fn request_url_preview(url: &Url) -> Result { Ok(data) } +/// Retrieve URL preview data from database if available, or generate it async fn get_url_preview(url: &Url) -> Result { if let Some(preview) = services().media.get_url_preview(url.as_str()).await { return Ok(preview); @@ -246,25 +251,15 @@ async fn get_url_preview(url: &Url) -> Result { fn url_preview_allowed(url: &Url) -> bool { // host's existence is already verified in get_media_preview_route, unwrap is safe let host = url.host_str().unwrap().to_lowercase(); - let host_parts_iter = host - .char_indices() - .filter_map(|(i, c)| { - if i == 0 { - Some(host.as_str()) - } - else if c == '.' { - Some(&host[i+1..]) - } - else { - None - } - }) - .rev().skip(1); // don't match TLDs - - let ret = ["*"].into_iter().chain(host_parts_iter).any(|nld| { - services().globals.url_preview_allowlist().any(|a| a == nld) - }); - ret // temp variable to avoid returning from the closure + let preview_config = services().globals.url_previews(); + match preview_config.default { + UrlPreviewPermission::Forbid => { + preview_config.exceptions.iter().any(|ex| ex.matches(&host)) + }, + UrlPreviewPermission::Allow => { + !preview_config.exceptions.iter().any(|ex| ex.matches(&host)) + }, + } } /// # `GET /_matrix/media/r0/preview_url` diff --git a/src/config/mod.rs b/src/config/mod.rs index b8eef648..29d8bc1f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -4,12 +4,14 @@ use std::{ net::{IpAddr, Ipv4Addr}, }; +use wild_carded_domain::WildCardedDomain; use ruma::{OwnedServerName, RoomVersionId}; use serde::{de::IgnoredAny, Deserialize}; use tracing::warn; use url::Url; mod proxy; +mod wild_carded_domain; use self::proxy::ProxyConfig; @@ -85,8 +87,8 @@ pub struct Config { pub emergency_password: Option, - #[serde(default = "Vec::new")] - pub url_preview_allowlist: Vec, + #[serde(default)] + pub url_previews: UrlPreviewConfig, #[serde(flatten)] pub catchall: BTreeMap, @@ -104,6 +106,35 @@ pub struct WellKnownConfig { pub server: Option, } +#[derive(Clone, Debug, Deserialize, Default)] +pub struct UrlPreviewConfig { + pub default: UrlPreviewPermission, + pub exceptions: Vec, +} + +#[derive(Clone, Debug, Deserialize, Default)] +pub enum UrlPreviewPermission { + Allow, + #[default] + Forbid, +} +impl UrlPreviewPermission { + pub fn invert(&self) -> Self { + match self { + UrlPreviewPermission::Allow => UrlPreviewPermission::Forbid, + UrlPreviewPermission::Forbid => UrlPreviewPermission::Allow, + } + } +} +impl fmt::Display for UrlPreviewPermission { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + UrlPreviewPermission::Allow => write!(f, "ALLOW"), + UrlPreviewPermission::Forbid => write!(f, "FORBID"), + } + } +} + const DEPRECATED_KEYS: &[&str] = &["cache_capacity"]; impl Config { @@ -235,7 +266,14 @@ impl fmt::Display for Config { }), ("Well-known server name", well_known_server.as_str()), ("Well-known client URL", &self.well_known_client()), - ("URL preview allowlist", &self.url_preview_allowlist.join(", ")), + ("URL preview", { + let mut lst = vec![]; + for exc in &self.url_previews.exceptions { + lst.push(format!("{} {}", self.url_previews.default.invert(), exc)); + } + lst.push(format!("{} {}", self.url_previews.default, "*")); + &lst.join(", ") + }), ]; let mut msg: String = "Active config values:\n\n".to_owned(); diff --git a/src/config/proxy.rs b/src/config/proxy.rs index c03463e7..05762e40 100644 --- a/src/config/proxy.rs +++ b/src/config/proxy.rs @@ -2,6 +2,7 @@ use reqwest::{Proxy, Url}; use serde::Deserialize; use crate::Result; +use super::wild_carded_domain::WildCardedDomain; /// ## Examples: /// - No proxy (default): @@ -92,52 +93,3 @@ impl PartialProxyConfig { } } } - -/// A domain name, that optionally allows a * as its first subdomain. -#[derive(Clone, Debug)] -pub enum WildCardedDomain { - WildCard, - WildCarded(String), - Exact(String), -} -impl WildCardedDomain { - pub fn matches(&self, domain: &str) -> bool { - match self { - WildCardedDomain::WildCard => true, - WildCardedDomain::WildCarded(d) => domain.ends_with(d), - WildCardedDomain::Exact(d) => domain == d, - } - } - pub fn more_specific_than(&self, other: &Self) -> bool { - match (self, other) { - (WildCardedDomain::WildCard, WildCardedDomain::WildCard) => false, - (_, WildCardedDomain::WildCard) => true, - (WildCardedDomain::Exact(a), WildCardedDomain::WildCarded(_)) => other.matches(a), - (WildCardedDomain::WildCarded(a), WildCardedDomain::WildCarded(b)) => { - a != b && a.ends_with(b) - } - _ => false, - } - } -} -impl std::str::FromStr for WildCardedDomain { - type Err = std::convert::Infallible; - fn from_str(s: &str) -> Result { - // maybe do some domain validation? - Ok(if s.starts_with("*.") { - WildCardedDomain::WildCarded(s[1..].to_owned()) - } else if s == "*" { - WildCardedDomain::WildCarded("".to_owned()) - } else { - WildCardedDomain::Exact(s.to_owned()) - }) - } -} -impl<'de> Deserialize<'de> for WildCardedDomain { - fn deserialize(deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - crate::utils::deserialize_from_str(deserializer) - } -} diff --git a/src/config/wild_carded_domain.rs b/src/config/wild_carded_domain.rs new file mode 100644 index 00000000..9452f230 --- /dev/null +++ b/src/config/wild_carded_domain.rs @@ -0,0 +1,64 @@ +use serde::Deserialize; +use std::fmt; + +/// A domain name, that optionally allows a * as its first subdomain. +#[derive(Clone, Debug)] +pub enum WildCardedDomain { + WildCard, + WildCarded(String), + Exact(String), +} + +impl WildCardedDomain { + pub fn matches(&self, domain: &str) -> bool { + match self { + WildCardedDomain::WildCard => true, + WildCardedDomain::WildCarded(d) => domain.ends_with(d), + WildCardedDomain::Exact(d) => domain == d, + } + } + pub fn more_specific_than(&self, other: &Self) -> bool { + match (self, other) { + (WildCardedDomain::WildCard, WildCardedDomain::WildCard) => false, + (_, WildCardedDomain::WildCard) => true, + (WildCardedDomain::Exact(a), WildCardedDomain::WildCarded(_)) => other.matches(a), + (WildCardedDomain::WildCarded(a), WildCardedDomain::WildCarded(b)) => { + a != b && a.ends_with(b) + } + _ => false, + } + } +} + +impl std::str::FromStr for WildCardedDomain { + type Err = std::convert::Infallible; + fn from_str(s: &str) -> Result { + // maybe do some domain validation? + Ok(if s.starts_with("*.") { + WildCardedDomain::WildCarded(s[1..].to_lowercase()) + } else if s == "*" { + WildCardedDomain::WildCarded("".to_lowercase()) + } else { + WildCardedDomain::Exact(s.to_lowercase()) + }) + } +} + +impl<'de> Deserialize<'de> for WildCardedDomain { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + crate::utils::deserialize_from_str(deserializer) + } +} + +impl fmt::Display for WildCardedDomain { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + WildCardedDomain::WildCard => write!(f, "*"), + WildCardedDomain::WildCarded(d) => write!(f, "*{d}"), + WildCardedDomain::Exact(d) => write!(f, "{d}"), + } + } +} diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 98c01902..88359afa 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -7,6 +7,7 @@ use ruma::{ use crate::api::server_server::DestinationResponse; +use crate::config::UrlPreviewConfig; use crate::{services, Config, Error, Result}; use futures_util::FutureExt; use hickory_resolver::TokioAsyncResolver; @@ -324,8 +325,8 @@ impl Service { self.config.allow_federation } - pub fn url_preview_allowlist(&self) -> impl Iterator { - self.config.url_preview_allowlist.iter().map(|x| x.as_str()) + pub fn url_previews(&self) -> &UrlPreviewConfig { + &self.config.url_previews } pub fn allow_room_creation(&self) -> bool { From 7dd9bda17b92bbb09660c36c3c728fe2cf023945 Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:21:16 -0700 Subject: [PATCH 13/15] Do spamhaus check --- src/api/client_server/media.rs | 29 +++++++++++++++++++++++++---- src/config/mod.rs | 1 + 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index c6d3cb8a..8c80bbae 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -7,6 +7,7 @@ use crate::{ service::media::{FileMeta, UrlPreviewData}, config::UrlPreviewPermission, services, utils, Error, Result, Ruma}; +use hickory_resolver::error::ResolveErrorKind; use http::header::{CONTENT_DISPOSITION, CONTENT_TYPE}; use ruma::{ api::{ @@ -126,7 +127,7 @@ async fn download_html( Ok(data) } -fn url_request_allowed(addr: &IpAddr) -> bool { +fn is_ip_external(addr: &IpAddr) -> bool { // could be implemented with reqwest when it supports IP filtering: // https://github.com/seanmonstar/reqwest/issues/1515 @@ -179,18 +180,38 @@ fn url_request_allowed(addr: &IpAddr) -> bool { /// Generate URL preview data from the given URL async fn request_url_preview(url: &Url) -> Result { - // resolve host to IP to ensure it's not a local IP (host guaranteed to not be None) + // host guaranteed to not be None by get_media_preview_route + let host = url.host_str().unwrap(); + + // resolve host to IP to ensure it's not an internal IP let dns_resolver = services().globals.dns_resolver(); - match dns_resolver.lookup_ip(url.host_str().unwrap()).await { + match dns_resolver.lookup_ip(host).await { Err(_) => { return Err(Error::BadServerResponse("Failed to resolve media preview host")); }, - Ok(lookup) if lookup.iter().any(|ip| !url_request_allowed(&ip)) => { + Ok(lookup) if lookup.iter().any(|ip| !is_ip_external(&ip)) => { return Err(Error::BadRequest(ErrorKind::Unknown, "Requesting from this address forbidden")); }, Ok(_) => { }, } + // Spamhaus API is over DNS. Query the API domain, no result = no block + // https://docs.spamhaus.com/datasets/docs/source/70-access-methods/data-query-service/040-dqs-queries.html + if services().globals.url_previews().use_spamhaus_denylist { + let resolver = services().globals.dns_resolver(); + match resolver.lookup_ip(format!("{host}.dbl.spamhaus.org")).await { + Err(e) => { + if let ResolveErrorKind::NoRecordsFound { .. } = e.kind() { } + else { + tracing::log::warn!("Failed to check Spamhaus denylist: {}", e); + } + }, + Ok(_) => { + return Err(Error::BadRequest(ErrorKind::Unknown, "Domain fails reputation check")); + }, + } + } + let client = services().globals.default_client(); let response = client.head(url.as_str()).send().await?; diff --git a/src/config/mod.rs b/src/config/mod.rs index 29d8bc1f..1e9dbccb 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,6 +110,7 @@ pub struct WellKnownConfig { pub struct UrlPreviewConfig { pub default: UrlPreviewPermission, pub exceptions: Vec, + pub use_spamhaus_denylist: bool, } #[derive(Clone, Debug, Deserialize, Default)] From 03653d6a10b74a17ba04b077d7ec5646d79ca34c Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:16:09 -0700 Subject: [PATCH 14/15] Fix domain lookups --- src/api/client_server/media.rs | 4 ++-- src/config/mod.rs | 2 ++ tests/test-config.toml | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 8c80bbae..75a62d9c 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -185,7 +185,7 @@ async fn request_url_preview(url: &Url) -> Result { // resolve host to IP to ensure it's not an internal IP let dns_resolver = services().globals.dns_resolver(); - match dns_resolver.lookup_ip(host).await { + match dns_resolver.lookup_ip(format!("{host}.")).await { Err(_) => { return Err(Error::BadServerResponse("Failed to resolve media preview host")); }, @@ -199,7 +199,7 @@ async fn request_url_preview(url: &Url) -> Result { // https://docs.spamhaus.com/datasets/docs/source/70-access-methods/data-query-service/040-dqs-queries.html if services().globals.url_previews().use_spamhaus_denylist { let resolver = services().globals.dns_resolver(); - match resolver.lookup_ip(format!("{host}.dbl.spamhaus.org")).await { + match resolver.lookup_ip(format!("{host}.dbl.spamhaus.org.")).await { Err(e) => { if let ResolveErrorKind::NoRecordsFound { .. } = e.kind() { } else { diff --git a/src/config/mod.rs b/src/config/mod.rs index 1e9dbccb..8cd2d422 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -109,7 +109,9 @@ pub struct WellKnownConfig { #[derive(Clone, Debug, Deserialize, Default)] pub struct UrlPreviewConfig { pub default: UrlPreviewPermission, + #[serde(default)] pub exceptions: Vec, + #[serde(default)] pub use_spamhaus_denylist: bool, } diff --git a/tests/test-config.toml b/tests/test-config.toml index 10db1408..c6773a16 100644 --- a/tests/test-config.toml +++ b/tests/test-config.toml @@ -8,6 +8,7 @@ database_path = "/tmp" # All the other settings are left at their defaults: address = "127.0.0.1" +database_backend = "rocksdb" allow_registration = true max_request_size = 20_000_000 port = 6167 From 0e34f07d11633f8077462ee20714765c11b31ae9 Mon Sep 17 00:00:00 2001 From: Steven Vergenz <1882376+stevenvergenz@users.noreply.github.com> Date: Mon, 4 Nov 2024 10:52:43 -0800 Subject: [PATCH 15/15] Fix lint/clippy --- src/api/client_server/media.rs | 113 ++++++++++++++++---------------- src/config/mod.rs | 2 +- src/config/proxy.rs | 2 +- src/database/key_value/media.rs | 21 +++--- src/service/globals/mod.rs | 3 +- src/service/media/data.rs | 10 +-- src/service/media/mod.rs | 12 ++-- 7 files changed, 79 insertions(+), 84 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 75a62d9c..48b4f56a 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -4,9 +4,10 @@ use std::time::Duration; use crate::{ - service::media::{FileMeta, UrlPreviewData}, config::UrlPreviewPermission, - services, utils, Error, Result, Ruma}; + service::media::{FileMeta, UrlPreviewData}, + services, utils, Error, Result, Ruma, +}; use hickory_resolver::error::ResolveErrorKind; use http::header::{CONTENT_DISPOSITION, CONTENT_TYPE}; use ruma::{ @@ -16,9 +17,7 @@ use ruma::{ get_content, get_content_as_filename, get_content_thumbnail, get_media_config, }, error::ErrorKind, - media::{ - self, create_content, get_media_preview, - }, + media::{self, create_content, get_media_preview}, }, federation::authenticated_media::{self as federation_media, FileOrLocation}, }, @@ -27,12 +26,10 @@ use ruma::{ ServerName, UInt, }; -use { - webpage::HTML, - reqwest::Url, - std::{io::Cursor, net::IpAddr, sync::Arc}, - image::io::Reader as ImgReader, -}; +use image::io::Reader as ImgReader; +use reqwest::Url; +use std::{io::Cursor, net::IpAddr, sync::Arc}; +use webpage::HTML; const MXC_LENGTH: usize = 32; @@ -58,17 +55,15 @@ pub async fn get_media_config_auth_route( }) } -async fn download_image( - client: &reqwest::Client, - url: &str, -) -> Result { +async fn download_image(client: &reqwest::Client, url: &str) -> Result { let image = client.get(url).send().await?.bytes().await?; let mxc = format!( "mxc://{}/{}", services().globals.server_name(), utils::random_string(MXC_LENGTH) ); - services().media + services() + .media .create(mxc.clone(), None, None, &image) .await?; @@ -89,10 +84,7 @@ async fn download_image( }) } -async fn download_html( - client: &reqwest::Client, - url: &str, -) -> Result { +async fn download_html(client: &reqwest::Client, url: &str) -> Result { let max_download_size = 300_000; let mut response = client.get(url).send().await?; @@ -122,7 +114,11 @@ async fn download_html( let props = html.opengraph.properties; /* use OpenGraph title/description, but fall back to HTML if not available */ - data.title = props.get("title").cloned().or(html.title).unwrap_or(String::from(url)); + data.title = props + .get("title") + .cloned() + .or(html.title) + .unwrap_or(String::from(url)); data.description = props.get("description").cloned().or(html.description); Ok(data) } @@ -169,7 +165,7 @@ fn is_ip_external(addr: &IpAddr) -> bool { // AS112-v6 (`2001:4:112::/48`) || matches!(ip6.segments(), [0x2001, 4, 0x112, _, _, _, _, _]) // ORCHIDv2 (`2001:20::/28`) - || matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b >= 0x20 && b <= 0x2F) + || matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x2f).contains(&b)) )) || ((ip6.segments()[0] == 0x2001) && (ip6.segments()[1] == 0xdb8)) // is_documentation() || ((ip6.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local() @@ -187,28 +183,39 @@ async fn request_url_preview(url: &Url) -> Result { let dns_resolver = services().globals.dns_resolver(); match dns_resolver.lookup_ip(format!("{host}.")).await { Err(_) => { - return Err(Error::BadServerResponse("Failed to resolve media preview host")); - }, + return Err(Error::BadServerResponse( + "Failed to resolve media preview host", + )); + } Ok(lookup) if lookup.iter().any(|ip| !is_ip_external(&ip)) => { - return Err(Error::BadRequest(ErrorKind::Unknown, "Requesting from this address forbidden")); - }, - Ok(_) => { }, + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Requesting from this address forbidden", + )); + } + Ok(_) => {} } // Spamhaus API is over DNS. Query the API domain, no result = no block // https://docs.spamhaus.com/datasets/docs/source/70-access-methods/data-query-service/040-dqs-queries.html if services().globals.url_previews().use_spamhaus_denylist { let resolver = services().globals.dns_resolver(); - match resolver.lookup_ip(format!("{host}.dbl.spamhaus.org.")).await { + match resolver + .lookup_ip(format!("{host}.dbl.spamhaus.org.")) + .await + { Err(e) => { - if let ResolveErrorKind::NoRecordsFound { .. } = e.kind() { } - else { + if let ResolveErrorKind::NoRecordsFound { .. } = e.kind() { + } else { tracing::log::warn!("Failed to check Spamhaus denylist: {}", e); } - }, + } Ok(_) => { - return Err(Error::BadRequest(ErrorKind::Unknown, "Domain fails reputation check")); - }, + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Domain fails reputation check", + )); + } } } @@ -239,7 +246,10 @@ async fn request_url_preview(url: &Url) -> Result { } }; - services().media.set_url_preview(url.as_str(), &data).await?; + services() + .media + .set_url_preview(url.as_str(), &data) + .await?; Ok(data) } @@ -264,7 +274,7 @@ async fn get_url_preview(url: &Url) -> Result { match services().media.get_url_preview(url.as_str()).await { Some(preview) => Ok(preview), - None => request_url_preview(url).await + None => request_url_preview(url).await, } } @@ -276,10 +286,10 @@ fn url_preview_allowed(url: &Url) -> bool { match preview_config.default { UrlPreviewPermission::Forbid => { preview_config.exceptions.iter().any(|ex| ex.matches(&host)) - }, + } UrlPreviewPermission::Allow => { !preview_config.exceptions.iter().any(|ex| ex.matches(&host)) - }, + } } } @@ -291,21 +301,14 @@ pub async fn get_media_preview_route( ) -> Result { let url = match Url::parse(&body.url) { Err(_) => { + return Err(Error::BadRequest(ErrorKind::Unknown, "Not a valid URL")); + } + Ok(u) if u.scheme() != "http" && u.scheme() != "https" || u.host().is_none() => { return Err(Error::BadRequest( - ErrorKind::Unknown, - "Not a valid URL", - )); - }, - Ok(u) - if u.scheme() != "http" - && u.scheme() != "https" - || u.host().is_none() - => { - return Err(Error::BadRequest( - ErrorKind::Unknown, + ErrorKind::Unknown, "Not a valid HTTP URL", )); - }, + } Ok(url) => url, }; @@ -320,13 +323,11 @@ pub async fn get_media_preview_route( Ok(preview) => { let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); Ok(get_media_preview::v3::Response::from_raw_value(res)) - }, - Err(_) => { - Err(Error::BadRequest( - ErrorKind::NotFound, - "Failed to find preview data", - )) - }, + } + Err(_) => Err(Error::BadRequest( + ErrorKind::NotFound, + "Failed to find preview data", + )), } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8cd2d422..20c9c241 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -4,11 +4,11 @@ use std::{ net::{IpAddr, Ipv4Addr}, }; -use wild_carded_domain::WildCardedDomain; use ruma::{OwnedServerName, RoomVersionId}; use serde::{de::IgnoredAny, Deserialize}; use tracing::warn; use url::Url; +use wild_carded_domain::WildCardedDomain; mod proxy; mod wild_carded_domain; diff --git a/src/config/proxy.rs b/src/config/proxy.rs index 05762e40..0b604647 100644 --- a/src/config/proxy.rs +++ b/src/config/proxy.rs @@ -1,8 +1,8 @@ use reqwest::{Proxy, Url}; use serde::Deserialize; -use crate::Result; use super::wild_carded_domain::WildCardedDomain; +use crate::Result; /// ## Examples: /// - No proxy (default): diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 165eebb3..f229633d 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,6 +1,10 @@ use ruma::{api::client::error::ErrorKind, http_headers::ContentDisposition}; -use crate::{database::KeyValueDatabase, service::{self, media::UrlPreviewData}, utils, Error, Result}; +use crate::{ + database::KeyValueDatabase, + service::{self, media::UrlPreviewData}, + utils, Error, Result, +}; impl service::media::Data for KeyValueDatabase { fn create_file_metadata( @@ -73,13 +77,16 @@ impl service::media::Data for KeyValueDatabase { self.url_previews.remove(url.as_bytes()) } - fn set_url_preview(&self, url: &str, data: &UrlPreviewData, timestamp: std::time::Duration) -> Result<()> { + fn set_url_preview( + &self, + url: &str, + data: &UrlPreviewData, + timestamp: std::time::Duration, + ) -> Result<()> { let mut value = Vec::::new(); value.extend_from_slice(×tamp.as_secs().to_be_bytes()); value.push(0xff); - value.extend_from_slice( - data.title.as_bytes(), - ); + value.extend_from_slice(data.title.as_bytes()); value.push(0xff); value.extend_from_slice( data.description @@ -88,9 +95,7 @@ impl service::media::Data for KeyValueDatabase { .unwrap_or_default(), ); value.push(0xff); - value.extend_from_slice( - data.image.as_bytes(), - ); + value.extend_from_slice(data.image.as_bytes()); value.push(0xff); value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes()); value.push(0xff); diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 88359afa..3048d076 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -7,8 +7,7 @@ use ruma::{ use crate::api::server_server::DestinationResponse; -use crate::config::UrlPreviewConfig; -use crate::{services, Config, Error, Result}; +use crate::{config::UrlPreviewConfig, services, Config, Error, Result}; use futures_util::FutureExt; use hickory_resolver::TokioAsyncResolver; use hyper_util::client::legacy::connect::dns::{GaiResolver, Name as HyperName}; diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 1d0f87ab..97da0a26 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -20,10 +20,7 @@ pub trait Data: Send + Sync { height: u32, ) -> Result<(ContentDisposition, Option, Vec)>; - fn remove_url_preview( - &self, - url: &str - ) -> Result<()>; + fn remove_url_preview(&self, url: &str) -> Result<()>; fn set_url_preview( &self, @@ -32,8 +29,5 @@ pub trait Data: Send + Sync { timestamp: std::time::Duration, ) -> Result<()>; - fn get_url_preview( - &self, - url: &str - ) -> Option; + fn get_url_preview(&self, url: &str) -> Option; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 23fb2236..33d64446 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,7 +1,7 @@ mod data; use std::{ - io::Cursor, collections::HashMap, + io::Cursor, sync::{Arc, RwLock}, time::SystemTime, }; @@ -12,12 +12,12 @@ use ruma::http_headers::{ContentDisposition, ContentDispositionType}; use crate::{services, Result}; use image::imageops::FilterType; +use serde::Serialize; use tokio::{ fs::File, io::{AsyncReadExt, AsyncWriteExt, BufReader}, sync::Mutex, }; -use serde::Serialize; pub struct FileMeta { pub content_disposition: ContentDisposition, @@ -27,18 +27,14 @@ pub struct FileMeta { #[derive(Serialize, Default)] pub struct UrlPreviewData { - #[serde( - rename(serialize = "og:title") - )] + #[serde(rename(serialize = "og:title"))] pub title: String, #[serde( skip_serializing_if = "Option::is_none", rename(serialize = "og:description") )] pub description: Option, - #[serde( - rename(serialize = "og:image") - )] + #[serde(rename(serialize = "og:image"))] pub image: String, #[serde( skip_serializing_if = "Option::is_none",