1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-07-02 16:38:36 +00:00

Preview URLs

Closes: #14
This commit is contained in:
Reiner Herrmann 2023-07-28 17:53:15 +02:00
parent dc0fa09a57
commit bb4cade9fd
12 changed files with 460 additions and 3 deletions

View file

@ -2,13 +2,22 @@ use std::time::Duration;
use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma};
use ruma::api::client::{
error::ErrorKind,
error::{ErrorKind, RetryAfter},
media::{
create_content, get_content, get_content_as_filename, get_content_thumbnail,
get_media_config,
get_media_config, get_media_preview
},
};
#[cfg(feature = "url_preview")]
use {
crate::service::media::UrlPreviewData,
webpage::HTML,
std::{io::Cursor, net::IpAddr, sync::Arc, time::Duration},
tokio::sync::Notify,
image::io::Reader as ImgReader,
};
const MXC_LENGTH: usize = 32;
/// # `GET /_matrix/media/r0/config`
@ -22,6 +31,230 @@ pub async fn get_media_config_route(
})
}
#[cfg(feature = "url_preview")]
async fn download_image(
client: &reqwest::Client,
url: &str,
) -> Result<UrlPreviewData> {
let image = client.get(url).send().await?.bytes().await?;
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
utils::random_string(MXC_LENGTH)
);
services().media
.create(mxc.clone(), None, None, &image)
.await?;
let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() {
Err(_) => (None, None),
Ok(reader) => match reader.into_dimensions() {
Err(_) => (None, None),
Ok((width, height)) => (Some(width), Some(height)),
},
};
Ok(UrlPreviewData {
image: Some(mxc),
image_size: Some(image.len()),
image_width: width,
image_height: height,
..Default::default()
})
}
#[cfg(feature = "url_preview")]
async fn download_html(
client: &reqwest::Client,
url: &str,
) -> Result<UrlPreviewData> {
let max_download_size = 300_000;
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > max_download_size {
break;
}
}
let body = String::from_utf8_lossy(&bytes);
let html = match HTML::from_string(body.to_string(), Some(url.to_owned())) {
Ok(html) => html,
Err(_) => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Failed to parse HTML",
))
}
};
let mut data = match html.opengraph.images.first() {
None => UrlPreviewData::default(),
Some(obj) => download_image(client, &obj.url).await?,
};
let props = html.opengraph.properties;
/* use OpenGraph title/description, but fall back to HTML if not available */
data.title = props.get("title").cloned().or(html.title);
data.description = props.get("description").cloned().or(html.description);
Ok(data)
}
#[cfg(feature = "url_preview")]
fn url_request_allowed(addr: &IpAddr) -> bool {
// could be implemented with reqwest when it supports IP filtering:
// https://github.com/seanmonstar/reqwest/issues/1515
// TODO: simplify to .is_global() when it has been stabilized
match addr {
IpAddr::V4(ip4) => {
!(ip4.is_private()
|| ip4.is_loopback()
|| ip4.is_link_local()
|| ip4.is_multicast()
|| ip4.is_broadcast()
|| ip4.is_documentation()
|| ip4.is_unspecified())
}
IpAddr::V6(ip6) => !(ip6.is_loopback() || ip6.is_multicast() || ip6.is_unspecified()),
}
}
#[cfg(feature = "url_preview")]
async fn request_url_preview(url: String) -> Result<UrlPreviewData> {
let client = services().globals.default_client();
let response = client.head(&url).send().await?;
if !response
.remote_addr()
.map_or(false, |a| url_request_allowed(&a.ip()))
{
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Requesting from this address forbidden",
));
}
let content_type = match response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|x| x.to_str().ok())
{
Some(ct) => ct,
None => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unknown Content-Type",
))
}
};
let data = match content_type {
html if html.starts_with("text/html") => download_html(&client, &url).await?,
img if img.starts_with("image/") => download_image(&client, &url).await?,
_ => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unsupported Content-Type",
))
}
};
services().media.set_url_preview(&url, &data).await?;
Ok(data)
}
#[cfg(feature = "url_preview")]
async fn get_url_preview(url: String) -> Result<UrlPreviewData> {
if let Some(preview) = services().media.get_url_preview(&url).await {
return Ok(preview);
}
let notif_opt = services()
.media
.url_preview_requests
.read()
.unwrap()
.get(&url)
.cloned();
match notif_opt {
None => {
let notifier = Arc::new(Notify::new());
{
services().media
.url_preview_requests
.write()
.unwrap()
.insert(url.clone(), notifier.clone());
}
let data = request_url_preview(url.clone()).await;
notifier.notify_waiters();
{
services().media.url_preview_requests.write().unwrap().remove(&url);
}
data
}
Some(notifier) => {
// wait until being notified that request is finished
let notifier = notifier.clone();
let notifier = notifier.notified();
notifier.await;
services().media
.get_url_preview(&url)
.await
.ok_or(Error::BadRequest(
ErrorKind::Unknown,
"No Preview available",
))
}
}
}
/// # `GET /_matrix/media/r0/preview_url`
///
/// Returns URL preview.
#[cfg(feature = "url_preview")]
pub async fn get_media_preview_route(
body: Ruma<get_media_preview::v3::Request>,
) -> Result<get_media_preview::v3::Response> {
if !services().globals.allow_url_preview() {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Previewing URL not allowed",
));
}
if let Ok(preview) = get_url_preview(body.url.clone()).await {
let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed");
return Ok(get_media_preview::v3::Response::from_raw_value(res));
}
Err(Error::BadRequest(
ErrorKind::LimitExceeded {
retry_after: Some(RetryAfter::Delay(Duration::from_secs(5))),
},
"Retry later",
))
}
#[cfg(not(feature = "url_preview"))]
pub async fn get_media_preview_route(
_body: Ruma<get_media_preview::v3::Request>,
) -> Result<get_media_preview::v3::Response> {
Err(Error::BadRequest(
ErrorKind::Forbidden,
"URL preview not implemented",
))
}
/// # `POST /_matrix/media/r0/upload`
///
/// Permanently save media in the server.