1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-06-27 16:35:59 +00:00

Preview URLs

Closes: #14
This commit is contained in:
Reiner Herrmann 2023-07-28 17:53:15 +02:00
parent dc0fa09a57
commit bb4cade9fd
12 changed files with 460 additions and 3 deletions

View file

@ -147,6 +147,8 @@ tikv-jemallocator = { version = "0.5.0", features = [
sd-notify = { version = "0.4.1", optional = true }
webpage = { version = "1.6", default-features = false, optional = true }
# Used for matrix spec type definitions and helpers
[dependencies.ruma]
features = [
@ -186,6 +188,7 @@ conduit_bin = ["axum"]
jemalloc = ["tikv-jemallocator"]
sqlite = ["parking_lot", "rusqlite", "tokio/signal"]
systemd = ["sd-notify"]
url_preview = ["webpage"]
[[bin]]
name = "conduit"

View file

@ -47,6 +47,9 @@ registration_token = ""
allow_check_for_updates = true
allow_federation = true
# Allows clients to request a URL preview
allow_url_preview = false
# Enable the display name lightning bolt on registration.
enable_lightning_bolt = true

3
debian/postinst vendored
View file

@ -84,6 +84,9 @@ allow_check_for_updates = true
# Enable the display name lightning bolt on registration.
enable_lightning_bolt = true
# Allows clients to request a URL preview
allow_url_preview = false
# Servers listed here will be used to gather public keys of other servers.
# Generally, copying this exactly should be enough. (Currently, Conduit doesn't
# support batched key requests, so this list should only contain Synapse

View file

@ -2,13 +2,22 @@ use std::time::Duration;
use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma};
use ruma::api::client::{
error::ErrorKind,
error::{ErrorKind, RetryAfter},
media::{
create_content, get_content, get_content_as_filename, get_content_thumbnail,
get_media_config,
get_media_config, get_media_preview
},
};
#[cfg(feature = "url_preview")]
use {
crate::service::media::UrlPreviewData,
webpage::HTML,
std::{io::Cursor, net::IpAddr, sync::Arc, time::Duration},
tokio::sync::Notify,
image::io::Reader as ImgReader,
};
const MXC_LENGTH: usize = 32;
/// # `GET /_matrix/media/r0/config`
@ -22,6 +31,230 @@ pub async fn get_media_config_route(
})
}
#[cfg(feature = "url_preview")]
async fn download_image(
client: &reqwest::Client,
url: &str,
) -> Result<UrlPreviewData> {
let image = client.get(url).send().await?.bytes().await?;
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
utils::random_string(MXC_LENGTH)
);
services().media
.create(mxc.clone(), None, None, &image)
.await?;
let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() {
Err(_) => (None, None),
Ok(reader) => match reader.into_dimensions() {
Err(_) => (None, None),
Ok((width, height)) => (Some(width), Some(height)),
},
};
Ok(UrlPreviewData {
image: Some(mxc),
image_size: Some(image.len()),
image_width: width,
image_height: height,
..Default::default()
})
}
#[cfg(feature = "url_preview")]
async fn download_html(
client: &reqwest::Client,
url: &str,
) -> Result<UrlPreviewData> {
let max_download_size = 300_000;
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > max_download_size {
break;
}
}
let body = String::from_utf8_lossy(&bytes);
let html = match HTML::from_string(body.to_string(), Some(url.to_owned())) {
Ok(html) => html,
Err(_) => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Failed to parse HTML",
))
}
};
let mut data = match html.opengraph.images.first() {
None => UrlPreviewData::default(),
Some(obj) => download_image(client, &obj.url).await?,
};
let props = html.opengraph.properties;
/* use OpenGraph title/description, but fall back to HTML if not available */
data.title = props.get("title").cloned().or(html.title);
data.description = props.get("description").cloned().or(html.description);
Ok(data)
}
#[cfg(feature = "url_preview")]
fn url_request_allowed(addr: &IpAddr) -> bool {
// could be implemented with reqwest when it supports IP filtering:
// https://github.com/seanmonstar/reqwest/issues/1515
// TODO: simplify to .is_global() when it has been stabilized
match addr {
IpAddr::V4(ip4) => {
!(ip4.is_private()
|| ip4.is_loopback()
|| ip4.is_link_local()
|| ip4.is_multicast()
|| ip4.is_broadcast()
|| ip4.is_documentation()
|| ip4.is_unspecified())
}
IpAddr::V6(ip6) => !(ip6.is_loopback() || ip6.is_multicast() || ip6.is_unspecified()),
}
}
#[cfg(feature = "url_preview")]
async fn request_url_preview(url: String) -> Result<UrlPreviewData> {
let client = services().globals.default_client();
let response = client.head(&url).send().await?;
if !response
.remote_addr()
.map_or(false, |a| url_request_allowed(&a.ip()))
{
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Requesting from this address forbidden",
));
}
let content_type = match response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|x| x.to_str().ok())
{
Some(ct) => ct,
None => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unknown Content-Type",
))
}
};
let data = match content_type {
html if html.starts_with("text/html") => download_html(&client, &url).await?,
img if img.starts_with("image/") => download_image(&client, &url).await?,
_ => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unsupported Content-Type",
))
}
};
services().media.set_url_preview(&url, &data).await?;
Ok(data)
}
#[cfg(feature = "url_preview")]
async fn get_url_preview(url: String) -> Result<UrlPreviewData> {
if let Some(preview) = services().media.get_url_preview(&url).await {
return Ok(preview);
}
let notif_opt = services()
.media
.url_preview_requests
.read()
.unwrap()
.get(&url)
.cloned();
match notif_opt {
None => {
let notifier = Arc::new(Notify::new());
{
services().media
.url_preview_requests
.write()
.unwrap()
.insert(url.clone(), notifier.clone());
}
let data = request_url_preview(url.clone()).await;
notifier.notify_waiters();
{
services().media.url_preview_requests.write().unwrap().remove(&url);
}
data
}
Some(notifier) => {
// wait until being notified that request is finished
let notifier = notifier.clone();
let notifier = notifier.notified();
notifier.await;
services().media
.get_url_preview(&url)
.await
.ok_or(Error::BadRequest(
ErrorKind::Unknown,
"No Preview available",
))
}
}
}
/// # `GET /_matrix/media/r0/preview_url`
///
/// Returns URL preview.
#[cfg(feature = "url_preview")]
pub async fn get_media_preview_route(
body: Ruma<get_media_preview::v3::Request>,
) -> Result<get_media_preview::v3::Response> {
if !services().globals.allow_url_preview() {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Previewing URL not allowed",
));
}
if let Ok(preview) = get_url_preview(body.url.clone()).await {
let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed");
return Ok(get_media_preview::v3::Response::from_raw_value(res));
}
Err(Error::BadRequest(
ErrorKind::LimitExceeded {
retry_after: Some(RetryAfter::Delay(Duration::from_secs(5))),
},
"Retry later",
))
}
#[cfg(not(feature = "url_preview"))]
pub async fn get_media_preview_route(
_body: Ruma<get_media_preview::v3::Request>,
) -> Result<get_media_preview::v3::Response> {
Err(Error::BadRequest(
ErrorKind::Forbidden,
"URL preview not implemented",
))
}
/// # `POST /_matrix/media/r0/upload`
///
/// Permanently save media in the server.

View file

@ -53,6 +53,8 @@ pub struct Config {
pub allow_encryption: bool,
#[serde(default = "false_fn")]
pub allow_federation: bool,
#[serde(default = "false_fn")]
pub allow_url_preview: bool,
#[serde(default = "true_fn")]
pub allow_room_creation: bool,
#[serde(default = "true_fn")]
@ -184,6 +186,7 @@ impl fmt::Display for Config {
),
("Allow encryption", &self.allow_encryption.to_string()),
("Allow federation", &self.allow_federation.to_string()),
("Allow URL preview", &self.allow_url_preview.to_string()),
("Allow room creation", &self.allow_room_creation.to_string()),
(
"JWT secret",

View file

@ -2,6 +2,9 @@ use ruma::api::client::error::ErrorKind;
use crate::{database::KeyValueDatabase, service, utils, Error, Result};
#[cfg(feature = "url_preview")]
use crate::service::media::UrlPreviewData;
impl service::media::Data for KeyValueDatabase {
fn create_file_metadata(
&self,
@ -79,4 +82,110 @@ impl service::media::Data for KeyValueDatabase {
};
Ok((content_disposition, content_type, key))
}
#[cfg(feature = "url_preview")]
fn remove_url_preview(&self, url: &str) -> Result<()> {
self.url_previews.remove(url.as_bytes())
}
#[cfg(feature = "url_preview")]
fn set_url_preview(&self, url: &str, data: &UrlPreviewData, timestamp: std::time::Duration) -> Result<()> {
let mut value = Vec::<u8>::new();
value.extend_from_slice(&timestamp.as_secs().to_be_bytes());
value.push(0xff);
value.extend_from_slice(
data.title
.as_ref()
.map(|t| t.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(
data.description
.as_ref()
.map(|d| d.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(
data.image
.as_ref()
.map(|i| i.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes());
value.push(0xff);
value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes());
value.push(0xff);
value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes());
self.url_previews.insert(url.as_bytes(), &value)
}
#[cfg(feature = "url_preview")]
fn get_url_preview(&self, url: &str) -> Option<UrlPreviewData> {
let values = self.url_previews.get(url.as_bytes()).ok()??;
let mut values = values.split(|&b| b == 0xff);
let _ts = match values
.next()
.map(|b| u64::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let title = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let description = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let image = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let image_size = match values
.next()
.map(|b| usize::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let image_width = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let image_height = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
Some(UrlPreviewData {
title,
description,
image,
image_size,
image_width,
image_height,
})
}
}

View file

@ -146,6 +146,8 @@ pub struct KeyValueDatabase {
//pub media: media::Media,
pub(super) mediaid_file: Arc<dyn KvTree>, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType
#[cfg(feature = "url_preview")]
pub(super) url_previews: Arc<dyn KvTree>,
//pub key_backups: key_backups::KeyBackups,
pub(super) backupid_algorithm: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
pub(super) backupid_etag: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
@ -362,6 +364,8 @@ impl KeyValueDatabase {
roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?,
roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?,
mediaid_file: builder.open_tree("mediaid_file")?,
#[cfg(feature = "url_preview")]
url_previews: builder.open_tree("url_previews")?,
backupid_algorithm: builder.open_tree("backupid_algorithm")?,
backupid_etag: builder.open_tree("backupid_etag")?,
backupkeyid_backup: builder.open_tree("backupkeyid_backup")?,

View file

@ -379,6 +379,7 @@ fn routes(config: &Config) -> Router {
.ruma_route(client_server::turn_server_route)
.ruma_route(client_server::send_event_to_device_route)
.ruma_route(client_server::get_media_config_route)
.ruma_route(client_server::get_media_preview_route)
.ruma_route(client_server::create_content_route)
.ruma_route(client_server::get_content_route)
.ruma_route(client_server::get_content_as_filename_route)

View file

@ -324,6 +324,10 @@ impl Service {
self.config.allow_federation
}
pub fn allow_url_preview(&self) -> bool {
self.config.allow_url_preview
}
pub fn allow_room_creation(&self) -> bool {
self.config.allow_room_creation
}

View file

@ -17,4 +17,24 @@ pub trait Data: Send + Sync {
width: u32,
height: u32,
) -> Result<(Option<String>, Option<String>, Vec<u8>)>;
#[cfg(feature = "url_preview")]
fn remove_url_preview(
&self,
url: &str
) -> Result<()>;
#[cfg(feature = "url_preview")]
fn set_url_preview(
&self,
url: &str,
data: &super::UrlPreviewData,
timestamp: std::time::Duration,
) -> Result<()>;
#[cfg(feature = "url_preview")]
fn get_url_preview(
&self,
url: &str
) -> Option<super::UrlPreviewData>;
}

View file

@ -11,14 +11,62 @@ use tokio::{
io::{AsyncReadExt, AsyncWriteExt, BufReader},
};
#[cfg(feature = "url_preview")]
use {
std::{
collections::HashMap,
sync::{Arc, RwLock},
},
serde::Serialize,
std::time::SystemTime,
tokio::sync::Notify,
};
pub struct FileMeta {
pub content_disposition: Option<String>,
pub content_type: Option<String>,
pub file: Vec<u8>,
}
#[cfg(feature = "url_preview")]
#[derive(Serialize, Default)]
pub struct UrlPreviewData {
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:title")
)]
pub title: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:description")
)]
pub description: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image")
)]
pub image: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "matrix:image:size")
)]
pub image_size: Option<usize>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image:width")
)]
pub image_width: Option<u32>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image:height")
)]
pub image_height: Option<u32>,
}
pub struct Service {
pub db: &'static dyn Data,
#[cfg(feature = "url_preview")]
pub url_preview_requests: RwLock<HashMap<String, Arc<Notify>>>,
}
impl Service {
@ -225,4 +273,23 @@ impl Service {
Ok(None)
}
}
#[cfg(feature = "url_preview")]
pub async fn get_url_preview(&self, url: &str) -> Option<UrlPreviewData> {
self.db.get_url_preview(url)
}
#[cfg(feature = "url_preview")]
pub async fn remove_url_preview(&self, url: &str) -> Result<()> {
// TODO: also remove the downloaded image
self.db.remove_url_preview(url)
}
#[cfg(feature = "url_preview")]
pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> {
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.expect("valid system time");
self.db.set_url_preview(url, data, now)
}
}

View file

@ -3,6 +3,9 @@ use std::{
sync::{Arc, Mutex as StdMutex},
};
#[cfg(feature = "url_preview")]
use std::sync::RwLock;
use lru_cache::LruCache;
use tokio::sync::{broadcast, Mutex};
@ -118,7 +121,11 @@ impl Services {
account_data: account_data::Service { db },
admin: admin::Service::build(),
key_backups: key_backups::Service { db },
media: media::Service { db },
media: media::Service {
db,
#[cfg(feature = "url_preview")]
url_preview_requests: RwLock::new(HashMap::new())
},
sending: sending::Service::build(db, &config),
globals: globals::Service::load(db, config)?,