From 7b9ba62b6711fef41e253272ff23303f60d4b065 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 16 Mar 2025 01:16:25 +0000 Subject: [PATCH 01/15] correct path on federated media endpoint doc comment --- src/api/server_server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/server_server.rs b/src/api/server_server.rs index b6a038e5..30481fe6 100644 --- a/src/api/server_server.rs +++ b/src/api/server_server.rs @@ -2215,7 +2215,7 @@ pub async fn create_invite_route( }) } -/// # `GET /_matrix/federation/v1/media/download/{serverName}/{mediaId}` +/// # `GET /_matrix/federation/v1/media/download/{mediaId}` /// /// Load media from our server. pub async fn get_content_route( @@ -2246,7 +2246,7 @@ pub async fn get_content_route( } } -/// # `GET /_matrix/federation/v1/media/thumbnail/{serverName}/{mediaId}` +/// # `GET /_matrix/federation/v1/media/thumbnail/{mediaId}` /// /// Load media thumbnail from our server or over federation. pub async fn get_content_thumbnail_route( From 937521fcf1f4edb44900e37fa38bd2008f749d4f Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 23 Mar 2025 12:26:28 +0000 Subject: [PATCH 02/15] refactor: allow for post-processing on the config --- src/config/mod.rs | 237 ++++++++++++++++++++++++++++--------- src/service/globals/mod.rs | 21 ++-- 2 files changed, 190 insertions(+), 68 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 7ed875ed..296fe288 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -13,8 +13,8 @@ mod proxy; use self::proxy::ProxyConfig; -#[derive(Clone, Debug, Deserialize)] -pub struct Config { +#[derive(Deserialize)] +pub struct IncompleteConfig { #[serde(default = "default_address")] pub address: IpAddr, #[serde(default = "default_port")] @@ -60,7 +60,7 @@ pub struct Config { #[serde(default = "default_default_room_version")] pub default_room_version: RoomVersionId, #[serde(default)] - pub well_known: WellKnownConfig, + pub well_known: IncompleteWellKnownConfig, #[serde(default = "false_fn")] pub allow_jaeger: bool, #[serde(default = "false_fn")] @@ -87,6 +87,168 @@ pub struct Config { pub catchall: BTreeMap, } +#[derive(Deserialize, Clone, Debug)] +#[serde(from = "IncompleteConfig")] +pub struct Config { + pub address: IpAddr, + pub port: u16, + pub tls: Option, + + pub server_name: OwnedServerName, + pub database_backend: String, + pub database_path: String, + pub db_cache_capacity_mb: f64, + pub enable_lightning_bolt: bool, + pub allow_check_for_updates: bool, + pub conduit_cache_capacity_modifier: f64, + pub rocksdb_max_open_files: i32, + pub pdu_cache_capacity: u32, + pub cleanup_second_interval: u32, + pub max_request_size: u32, + pub max_concurrent_requests: u16, + pub max_fetch_prev_events: u16, + pub allow_registration: bool, + pub registration_token: Option, + pub openid_token_ttl: u64, + pub allow_encryption: bool, + pub allow_federation: bool, + pub allow_room_creation: bool, + pub allow_unstable_room_versions: bool, + pub default_room_version: RoomVersionId, + pub well_known: WellKnownConfig, + pub allow_jaeger: bool, + pub tracing_flame: bool, + pub proxy: ProxyConfig, + pub jwt_secret: Option, + pub trusted_servers: Vec, + pub log: String, + + pub turn: Option, + + pub emergency_password: Option, + + pub catchall: BTreeMap, +} + +impl From for Config { + fn from(val: IncompleteConfig) -> Self { + let IncompleteConfig { + address, + port, + tls, + server_name, + database_backend, + database_path, + db_cache_capacity_mb, + enable_lightning_bolt, + allow_check_for_updates, + conduit_cache_capacity_modifier, + rocksdb_max_open_files, + pdu_cache_capacity, + cleanup_second_interval, + max_request_size, + max_concurrent_requests, + max_fetch_prev_events, + allow_registration, + registration_token, + openid_token_ttl, + allow_encryption, + allow_federation, + allow_room_creation, + allow_unstable_room_versions, + default_room_version, + well_known, + allow_jaeger, + tracing_flame, + proxy, + jwt_secret, + trusted_servers, + log, + turn_username, + turn_password, + turn_uris, + turn_secret, + turn_ttl, + turn, + emergency_password, + catchall, + } = val; + + let turn = turn.or_else(|| { + let auth = if let Some(secret) = turn_secret { + TurnAuth::Secret { secret } + } else if let (Some(username), Some(password)) = (turn_username, turn_password) { + TurnAuth::UserPass { username, password } + } else { + return None; + }; + + if let (Some(uris), ttl) = (turn_uris, turn_ttl) { + Some(TurnConfig { uris, ttl, auth }) + } else { + None + } + }); + + let well_known_client = well_known + .client + .map(String::from) + .unwrap_or_else(|| format!("https://{server_name}")); + + let well_known_server = well_known.server.unwrap_or_else(|| { + if server_name.port().is_some() { + server_name.clone() + } else { + format!("{}:443", server_name.host()) + .try_into() + .expect("Host from valid hostname + :443 must be valid") + } + }); + + let well_known = WellKnownConfig { + client: well_known_client, + server: well_known_server, + }; + + Config { + address, + port, + tls, + server_name, + database_backend, + database_path, + db_cache_capacity_mb, + enable_lightning_bolt, + allow_check_for_updates, + conduit_cache_capacity_modifier, + rocksdb_max_open_files, + pdu_cache_capacity, + cleanup_second_interval, + max_request_size, + max_concurrent_requests, + max_fetch_prev_events, + allow_registration, + registration_token, + openid_token_ttl, + allow_encryption, + allow_federation, + allow_room_creation, + allow_unstable_room_versions, + default_room_version, + well_known, + allow_jaeger, + tracing_flame, + proxy, + jwt_secret, + trusted_servers, + log, + turn, + emergency_password, + catchall, + } + } +} + #[derive(Clone, Debug, Deserialize)] pub struct TlsConfig { pub certs: String, @@ -110,11 +272,20 @@ pub enum TurnAuth { } #[derive(Clone, Debug, Deserialize, Default)] -pub struct WellKnownConfig { +pub struct IncompleteWellKnownConfig { + // We use URL here so that the user gets an error if the config isn't a valid url pub client: Option, pub server: Option, } +#[derive(Clone, Debug)] +pub struct WellKnownConfig { + // We use String here as there is no point converting our manually constructed String into a + // URL, just for it to be converted back into a &str + pub client: String, + pub server: OwnedServerName, +} + const DEPRECATED_KEYS: &[&str] = &[ "cache_capacity", "turn_username", @@ -142,61 +313,9 @@ impl Config { } } -impl Config { - pub fn well_known_client(&self) -> String { - if let Some(url) = &self.well_known.client { - url.to_string() - } else { - format!("https://{}", self.server_name) - } - } - - pub fn well_known_server(&self) -> OwnedServerName { - match &self.well_known.server { - Some(server_name) => server_name.to_owned(), - None => { - if self.server_name.port().is_some() { - self.server_name.to_owned() - } else { - format!("{}:443", self.server_name.host()) - .try_into() - .expect("Host from valid hostname + :443 must be valid") - } - } - } - } - - pub fn turn(&self) -> Option { - if self.turn.is_some() { - self.turn.clone() - } else if let Some(uris) = self.turn_uris.clone() { - if let Some(secret) = self.turn_secret.clone() { - Some(TurnConfig { - uris, - ttl: self.turn_ttl, - auth: TurnAuth::Secret { secret }, - }) - } else if let (Some(username), Some(password)) = - (self.turn_username.clone(), self.turn_password.clone()) - { - Some(TurnConfig { - uris, - ttl: self.turn_ttl, - auth: TurnAuth::UserPass { username, password }, - }) - } else { - None - } - } else { - None - } - } -} - impl fmt::Display for Config { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Prepare a list of config values to show - let well_known_server = self.well_known_server(); let lines = [ ("Server name", self.server_name.host()), ("Database backend", &self.database_backend), @@ -247,7 +366,7 @@ impl fmt::Display for Config { &lst.join(", ") }), ("TURN URIs", { - if let Some(turn) = self.turn() { + if let Some(turn) = &self.turn { let mut lst = vec![]; for item in turn.uris.iter().cloned().enumerate() { let (_, uri): (usize, String) = item; @@ -258,8 +377,8 @@ impl fmt::Display for Config { "unset" } }), - ("Well-known server name", well_known_server.as_str()), - ("Well-known client URL", &self.well_known_client()), + ("Well-known server name", self.well_known.server.as_str()), + ("Well-known client URL", &self.well_known.client), ]; let mut msg: String = "Active config values:\n\n".to_owned(); diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 8296b39a..4f401ad3 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -349,7 +349,18 @@ impl Service { } pub fn turn(&self) -> Option { - self.config.turn() + // We have to clone basically the entire thing on `/turnServers` otherwise + self.config.turn.clone() + } + + pub fn well_known_server(&self) -> OwnedServerName { + // Same as above, but for /.well-known/matrix/server + self.config.well_known.server.clone() + } + + pub fn well_known_client(&self) -> String { + // Same as above, but for /.well-known/matrix/client + self.config.well_known.client.clone() } pub fn dns_resolver(&self) -> &TokioAsyncResolver { @@ -481,14 +492,6 @@ impl Service { r } - pub fn well_known_server(&self) -> OwnedServerName { - self.config.well_known_server() - } - - pub fn well_known_client(&self) -> String { - self.config.well_known_client() - } - pub fn shutdown(&self) { self.shutdown.store(true, atomic::Ordering::Relaxed); // On shutdown From 70d7f77363d4d79f05021f3dea423a931a646822 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 16 Mar 2025 17:40:55 +0000 Subject: [PATCH 03/15] feat(media): use file's sha256 for on-disk name & make directory configurable In addition, metadata about the file, such as creation time, last access, and file size, are stored in the database --- Cargo.lock | 8 + Cargo.toml | 3 + docs/configuration.md | 20 ++ src/api/client_server/media.rs | 64 +++--- src/api/server_server.rs | 24 +- src/config/mod.rs | 40 ++++ src/database/key_value/media.rs | 224 +++++++++++++++---- src/database/key_value/mod.rs | 2 +- src/database/mod.rs | 280 +++++++++++++++++++++-- src/main.rs | 2 +- src/service/globals/mod.rs | 28 +-- src/service/media/data.rs | 41 +++- src/service/media/mod.rs | 383 ++++++++++++++++++++------------ src/utils/mod.rs | 7 + 14 files changed, 840 insertions(+), 286 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5895889b..cf5e656d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -499,6 +499,7 @@ dependencies = [ "directories", "figment", "futures-util", + "hex", "hickory-resolver", "hmac", "http 1.1.0", @@ -528,6 +529,7 @@ dependencies = [ "serde_json", "serde_yaml", "sha-1", + "sha2", "thiserror 1.0.61", "thread_local", "threadpool", @@ -1045,6 +1047,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "hickory-proto" version = "0.24.1" diff --git a/Cargo.toml b/Cargo.toml index 407a5e5b..f120b014 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,9 @@ image = { version = "0.25", default-features = false, features = [ "jpeg", "png", ] } +# Used for creating media filenames +hex = "0.4" +sha2 = "0.10" # Used to encode server public key base64 = "0.22" # Used when hashing the state diff --git a/docs/configuration.md b/docs/configuration.md index 9687ead1..ffbfa512 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -57,9 +57,29 @@ The `global` section contains the following fields: | `turn_uris` | `array` | The TURN URIs | `[]` | | `turn_secret` | `string` | The TURN secret | `""` | | `turn_ttl` | `integer` | The TURN TTL in seconds | `86400` | +| `media` | `table` | See the [media configuration](#media) | See the [media configuration](#media) | | `emergency_password` | `string` | Set a password to login as the `conduit` user in case of emergency | N/A | | `well_known` | `table` | Used for [delegation](delegation.md) | See [delegation](delegation.md) | +### Media +The `media` table is used to configure how media is stored and where. Currently, there is only one available +backend, that being `filesystem`. The backend can be set using the `backend` field. Example: +``` +[global.media] +backend = "filesystem" # the default backend +``` + +#### Filesystem backend +The filesystem backend has the following fields: +- `path`: The base directory where all the media files will be stored (defaults to + `${database_path}/media`) + +##### Example: +``` +[global.media] +backend = "filesystem" +path = "/srv/matrix-media" +``` ### TLS The `tls` table contains the following fields: diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 9b7cf22e..29e4592f 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -54,33 +54,33 @@ pub async fn get_media_config_auth_route( pub async fn create_content_route( body: Ruma, ) -> Result { - let mxc = format!( - "mxc://{}/{}", - services().globals.server_name(), - utils::random_string(MXC_LENGTH) - ); + let create_content::v3::Request { + filename, + content_type, + file, + .. + } = body.body; + + let media_id = utils::random_string(MXC_LENGTH); services() .media .create( - mxc.clone(), - Some( - ContentDisposition::new(ContentDispositionType::Inline) - .with_filename(body.filename.clone()), - ), - body.content_type.as_deref(), - &body.file, + services().globals.server_name(), + &media_id, + filename.as_deref(), + content_type.as_deref(), + &file, ) .await?; Ok(create_content::v3::Response { - content_uri: mxc.into(), + content_uri: (format!("mxc://{}/{}", services().globals.server_name(), media_id)).into(), blurhash: None, }) } pub async fn get_remote_content( - mxc: &str, server_name: &ServerName, media_id: String, ) -> Result { @@ -120,7 +120,7 @@ pub async fn get_remote_content( server_name, media::get_content::v3::Request { server_name: server_name.to_owned(), - media_id, + media_id: media_id.clone(), timeout_ms: Duration::from_secs(20), allow_remote: false, allow_redirect: true, @@ -140,8 +140,12 @@ pub async fn get_remote_content( services() .media .create( - mxc.to_owned(), - content_response.content_disposition.clone(), + server_name, + &media_id, + content_response + .content_disposition + .as_ref() + .and_then(|cd| cd.filename.as_deref()), content_response.content_type.as_deref(), &content_response.file, ) @@ -186,13 +190,11 @@ async fn get_content( media_id: String, allow_remote: bool, ) -> Result { - let mxc = format!("mxc://{}/{}", server_name, media_id); - if let Ok(Some(FileMeta { content_disposition, content_type, file, - })) = services().media.get(mxc.clone()).await + })) = services().media.get(server_name, &media_id).await { Ok(get_content::v1::Response { file, @@ -200,8 +202,7 @@ async fn get_content( content_disposition: Some(content_disposition), }) } else if server_name != services().globals.server_name() && allow_remote { - let remote_content_response = - get_remote_content(&mxc, server_name, media_id.clone()).await?; + let remote_content_response = get_remote_content(server_name, media_id.clone()).await?; Ok(get_content::v1::Response { content_disposition: remote_content_response.content_disposition, @@ -262,11 +263,9 @@ async fn get_content_as_filename( filename: String, allow_remote: bool, ) -> Result { - let mxc = format!("mxc://{}/{}", server_name, media_id); - if let Ok(Some(FileMeta { file, content_type, .. - })) = services().media.get(mxc.clone()).await + })) = services().media.get(server_name, &media_id).await { Ok(get_content_as_filename::v1::Response { file, @@ -277,8 +276,7 @@ async fn get_content_as_filename( ), }) } else if server_name != services().globals.server_name() && allow_remote { - let remote_content_response = - get_remote_content(&mxc, server_name, media_id.clone()).await?; + let remote_content_response = get_remote_content(server_name, media_id.clone()).await?; Ok(get_content_as_filename::v1::Response { content_disposition: Some( @@ -351,8 +349,6 @@ async fn get_content_thumbnail( animated: Option, allow_remote: bool, ) -> Result { - let mxc = format!("mxc://{}/{}", server_name, media_id); - if let Some(FileMeta { file, content_type, @@ -360,7 +356,8 @@ async fn get_content_thumbnail( }) = services() .media .get_thumbnail( - mxc.clone(), + server_name, + &media_id, width .try_into() .map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Width is invalid."))?, @@ -452,7 +449,12 @@ async fn get_content_thumbnail( services() .media .upload_thumbnail( - mxc, + server_name, + &media_id, + thumbnail_response + .content_disposition + .as_ref() + .and_then(|cd| cd.filename.as_deref()), thumbnail_response.content_type.as_deref(), width.try_into().expect("all UInts are valid u32s"), height.try_into().expect("all UInts are valid u32s"), diff --git a/src/api/server_server.rs b/src/api/server_server.rs index 30481fe6..01ec47d3 100644 --- a/src/api/server_server.rs +++ b/src/api/server_server.rs @@ -2221,17 +2221,14 @@ pub async fn create_invite_route( pub async fn get_content_route( body: Ruma, ) -> Result { - let mxc = format!( - "mxc://{}/{}", - services().globals.server_name(), - body.media_id - ); - if let Some(FileMeta { content_disposition, content_type, file, - }) = services().media.get(mxc.clone()).await? + }) = services() + .media + .get(services().globals.server_name(), &body.media_id) + .await? { Ok(get_content::v1::Response::new( ContentMetadata::new(), @@ -2252,12 +2249,6 @@ pub async fn get_content_route( pub async fn get_content_thumbnail_route( body: Ruma, ) -> Result { - let mxc = format!( - "mxc://{}/{}", - services().globals.server_name(), - body.media_id - ); - let Some(FileMeta { file, content_type, @@ -2265,7 +2256,8 @@ pub async fn get_content_thumbnail_route( }) = services() .media .get_thumbnail( - mxc.clone(), + services().globals.server_name(), + &body.media_id, body.width .try_into() .map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Width is invalid."))?, @@ -2281,7 +2273,9 @@ pub async fn get_content_thumbnail_route( services() .media .upload_thumbnail( - mxc, + services().globals.server_name(), + &body.media_id, + content_disposition.filename.as_deref(), content_type.as_deref(), body.width.try_into().expect("all UInts are valid u32s"), body.height.try_into().expect("all UInts are valid u32s"), diff --git a/src/config/mod.rs b/src/config/mod.rs index 296fe288..370dcfec 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2,6 +2,7 @@ use std::{ collections::BTreeMap, fmt, net::{IpAddr, Ipv4Addr}, + path::PathBuf, }; use ruma::{OwnedServerName, RoomVersionId}; @@ -81,6 +82,9 @@ pub struct IncompleteConfig { pub turn: Option, + #[serde(default)] + pub media: IncompleteMediaConfig, + pub emergency_password: Option, #[serde(flatten)] @@ -125,6 +129,8 @@ pub struct Config { pub turn: Option, + pub media: MediaConfig, + pub emergency_password: Option, pub catchall: BTreeMap, @@ -170,6 +176,7 @@ impl From for Config { turn_secret, turn_ttl, turn, + media, emergency_password, catchall, } = val; @@ -210,6 +217,21 @@ impl From for Config { server: well_known_server, }; + let media = match media { + IncompleteMediaConfig::FileSystem { path } => MediaConfig::FileSystem { + path: path.unwrap_or_else(|| { + // We do this as we don't know if the path has a trailing slash, or even if the + // path separator is a forward or backward slash + [&database_path, "media"] + .iter() + .collect::() + .into_os_string() + .into_string() + .expect("Both inputs are valid UTF-8") + }), + }, + }; + Config { address, port, @@ -243,6 +265,7 @@ impl From for Config { trusted_servers, log, turn, + media, emergency_password, catchall, } @@ -286,6 +309,23 @@ pub struct WellKnownConfig { pub server: OwnedServerName, } +#[derive(Clone, Debug, Deserialize)] +#[serde(tag = "backend", rename_all = "lowercase")] +pub enum IncompleteMediaConfig { + FileSystem { path: Option }, +} + +impl Default for IncompleteMediaConfig { + fn default() -> Self { + Self::FileSystem { path: None } + } +} + +#[derive(Debug, Clone)] +pub enum MediaConfig { + FileSystem { path: String }, +} + const DEPRECATED_KEYS: &[&str] = &[ "cache_capacity", "turn_username", diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 99df0097..26232b8b 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,71 +1,199 @@ -use ruma::{api::client::error::ErrorKind, http_headers::ContentDisposition}; +use ruma::{api::client::error::ErrorKind, ServerName}; +use sha2::{digest::Output, Sha256}; +use tracing::error; -use crate::{database::KeyValueDatabase, service, utils, Error, Result}; +use crate::{ + database::KeyValueDatabase, + service::{self, media::DbFileMeta}, + utils, Error, Result, +}; impl service::media::Data for KeyValueDatabase { fn create_file_metadata( &self, - mxc: String, + sha256_digest: Output, + file_size: u64, + servername: &ServerName, + media_id: &str, + filename: Option<&str>, + content_type: Option<&str>, + ) -> Result<()> { + let metadata = FilehashMetadata::new(file_size); + + self.filehash_metadata + .insert(&sha256_digest, metadata.value())?; + + let mut key = sha256_digest.to_vec(); + key.extend_from_slice(servername.as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.filehash_servername_mediaid.insert(&key, &[])?; + + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + let mut value = sha256_digest.to_vec(); + value.extend_from_slice(filename.map(|f| f.as_bytes()).unwrap_or_default()); + value.push(0xff); + value.extend_from_slice(content_type.map(|f| f.as_bytes()).unwrap_or_default()); + + self.servernamemediaid_metadata.insert(&key, &value) + } + + fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result { + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + let value = self + .servernamemediaid_metadata + .get(&key)? + .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))?; + + let metadata = parse_metadata(&value).inspect_err(|e| { + error!("Error parsing metadata for \"mxc://{servername}/{media_id}\" from servernamemediaid_metadata: {e}"); + })?; + + // Only assume file is available if there is metadata about the filehash itself + self.filehash_metadata + .get(&metadata.sha256_digest)? + .map(|_| metadata) + .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found.")) + } + + fn create_thumbnail_metadata( + &self, + sha256_digest: Output, + file_size: u64, + servername: &ServerName, + media_id: &str, width: u32, height: u32, - content_disposition: &ContentDisposition, + filename: Option<&str>, content_type: Option<&str>, - ) -> Result> { - let mut key = mxc.as_bytes().to_vec(); + ) -> Result<()> { + let metadata = FilehashMetadata::new(file_size); + + self.filehash_metadata + .insert(&sha256_digest, metadata.value())?; + + let mut key = sha256_digest.to_vec(); + key.extend_from_slice(servername.as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); key.push(0xff); key.extend_from_slice(&width.to_be_bytes()); key.extend_from_slice(&height.to_be_bytes()); - key.push(0xff); - key.extend_from_slice(content_disposition.to_string().as_bytes()); - key.push(0xff); - key.extend_from_slice( - content_type - .as_ref() - .map(|c| c.as_bytes()) - .unwrap_or_default(), - ); - self.mediaid_file.insert(&key, &[])?; + self.filehash_thumbnailid.insert(&key, &[])?; - Ok(key) + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + key.push(0xff); + key.extend_from_slice(&width.to_be_bytes()); + key.extend_from_slice(&height.to_be_bytes()); + + let mut value = sha256_digest.to_vec(); + value.extend_from_slice(filename.map(|f| f.as_bytes()).unwrap_or_default()); + value.push(0xff); + value.extend_from_slice(content_type.map(|f| f.as_bytes()).unwrap_or_default()); + + self.thumbnailid_metadata.insert(&key, &value) } - fn search_file_metadata( + fn search_thumbnail_metadata( &self, - mxc: String, + servername: &ServerName, + media_id: &str, width: u32, height: u32, - ) -> Result<(ContentDisposition, Option, Vec)> { - let mut prefix = mxc.as_bytes().to_vec(); - prefix.push(0xff); - prefix.extend_from_slice(&width.to_be_bytes()); - prefix.extend_from_slice(&height.to_be_bytes()); - prefix.push(0xff); + ) -> Result { + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + key.push(0xff); + key.extend_from_slice(&width.to_be_bytes()); + key.extend_from_slice(&height.to_be_bytes()); - let (key, _) = self - .mediaid_file - .scan_prefix(prefix) - .next() - .ok_or(Error::BadRequest(ErrorKind::NotFound, "Media not found"))?; + let value = self + .thumbnailid_metadata + .get(&key)? + .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))?; - let mut parts = key.rsplit(|&b| b == 0xff); + let metadata = parse_metadata(&value).inspect_err(|e| { + error!("Error parsing metadata for thumbnail \"mxc://{servername}/{media_id}\" with dimensions {width}x{height} from thumbnailid_metadata: {e}"); + })?; - let content_type = parts - .next() - .map(|bytes| { - utils::string_from_bytes(bytes).map_err(|_| { - Error::bad_database("Content type in mediaid_file is invalid unicode.") - }) - }) - .transpose()?; - - let content_disposition_bytes = parts - .next() - .ok_or_else(|| Error::bad_database("Media ID in db is invalid."))?; - - let content_disposition = content_disposition_bytes.try_into().unwrap_or_else(|_| { - ContentDisposition::new(ruma::http_headers::ContentDispositionType::Inline) - }); - Ok((content_disposition, content_type, key)) + // Only assume file is available if there is metadata about the filehash itself + self.filehash_metadata + .get(&metadata.sha256_digest)? + .map(|_| metadata) + .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found.")) + } +} + +fn parse_metadata(value: &[u8]) -> Result { + let (sha256_digest, mut parts) = value + .split_at_checked(32) + .map(|(digest, value)| (digest.to_vec(), value.split(|&b| b == 0xff))) + .ok_or_else(|| Error::BadDatabase("Invalid format for media metadata"))?; + + let filename = parts + .next() + .map(|bytes| { + utils::string_from_bytes(bytes) + .map_err(|_| Error::BadDatabase("filename in media metadata is invalid unicode")) + }) + .transpose()? + .and_then(|s| (!s.is_empty()).then_some(s)); + + let content_type = parts + .next() + .map(|bytes| { + utils::string_from_bytes(bytes).map_err(|_| { + Error::BadDatabase("content type in media metadata is invalid unicode") + }) + }) + .transpose()? + .and_then(|s| (!s.is_empty()).then_some(s)); + + let unauthenticated_access_permitted = parts.next().is_some_and(|v| v.is_empty()); + + Ok(DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted, + }) +} + +pub struct FilehashMetadata { + value: Vec, +} + +impl FilehashMetadata { + pub fn new_with_times(size: u64, creation: u64, last_access: u64) -> Self { + let mut value = size.to_be_bytes().to_vec(); + value.extend_from_slice(&creation.to_be_bytes()); + value.extend_from_slice(&last_access.to_be_bytes()); + + Self { value } + } + + pub fn new(size: u64) -> Self { + let now = utils::secs_since_unix_epoch(); + + let mut value = size.to_be_bytes().to_vec(); + value.extend_from_slice(&now.to_be_bytes()); + value.extend_from_slice(&now.to_be_bytes()); + + Self { value } + } + + pub fn value(&self) -> &[u8] { + &self.value } } diff --git a/src/database/key_value/mod.rs b/src/database/key_value/mod.rs index c4496af8..f5f263e9 100644 --- a/src/database/key_value/mod.rs +++ b/src/database/key_value/mod.rs @@ -3,7 +3,7 @@ mod account_data; mod appservice; mod globals; mod key_backups; -mod media; +pub(super) mod media; //mod pdu; mod pusher; mod rooms; diff --git a/src/database/mod.rs b/src/database/mod.rs index e452fb5d..29520827 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -2,12 +2,13 @@ pub mod abstraction; pub mod key_value; use crate::{ - service::rooms::timeline::PduCount, services, utils, Config, Error, PduEvent, Result, Services, - SERVICES, + service::{globals, rooms::timeline::PduCount}, + services, utils, Config, Error, PduEvent, Result, Services, SERVICES, }; use abstraction::{KeyValueDatabaseEngine, KvTree}; use base64::{engine::general_purpose, Engine}; use directories::ProjectDirs; +use key_value::media::FilehashMetadata; use lru_cache::LruCache; use ruma::{ @@ -17,23 +18,50 @@ use ruma::{ GlobalAccountDataEvent, GlobalAccountDataEventType, StateEventType, }, push::Ruleset, - CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedRoomId, OwnedUserId, RoomId, - UserId, + CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedMxcUri, OwnedRoomId, + OwnedUserId, RoomId, UserId, }; use serde::Deserialize; +use sha2::{Digest, Sha256}; use std::{ collections::{BTreeMap, HashMap, HashSet}, fs::{self, remove_dir_all}, io::Write, mem::size_of, - path::Path, + path::{Path, PathBuf}, sync::{Arc, Mutex, RwLock}, - time::Duration, + time::{Duration, UNIX_EPOCH}, }; -use tokio::time::interval; +use tokio::{io::AsyncReadExt, time::interval}; use tracing::{debug, error, info, warn}; +/// This trait should only be used for migrations, and hence should never be made "pub" +trait GlobalsMigrationsExt { + /// As the name states, old version of `get_media_file`, only for usage in migrations + fn get_media_file_old_only_use_for_migrations(&self, key: &[u8]) -> PathBuf; + + /// As the name states, this should only be used for migrations. + fn get_media_folder_only_use_for_migrations(&self) -> PathBuf; +} + +impl GlobalsMigrationsExt for globals::Service { + fn get_media_file_old_only_use_for_migrations(&self, key: &[u8]) -> PathBuf { + let mut r = PathBuf::new(); + r.push(self.config.database_path.clone()); + r.push("media"); + r.push(general_purpose::URL_SAFE_NO_PAD.encode(key)); + r + } + + fn get_media_folder_only_use_for_migrations(&self) -> PathBuf { + let mut r = PathBuf::new(); + r.push(self.config.database_path.clone()); + r.push("media"); + r + } +} + pub struct KeyValueDatabase { _db: Arc, @@ -148,7 +176,11 @@ pub struct KeyValueDatabase { pub(super) roomusertype_roomuserdataid: Arc, // RoomUserType = Room + User + Type //pub media: media::Media, - pub(super) mediaid_file: Arc, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType + pub(super) servernamemediaid_metadata: Arc, // Servername + MediaID -> content sha256 + Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints + pub(super) filehash_servername_mediaid: Arc, // sha256 of content + Servername + MediaID, used to delete dangling references to filehashes from servernamemediaid + pub(super) filehash_metadata: Arc, // sha256 of content -> file size + creation time + last access time + pub(super) thumbnailid_metadata: Arc, // ThumbnailId = Servername + MediaID + width + height -> Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints + pub(super) filehash_thumbnailid: Arc, // sha256 of content + "ThumbnailId", as defined above. Used to dangling references to filehashes from thumbnailIds //pub key_backups: key_backups::KeyBackups, pub(super) backupid_algorithm: Arc, // BackupId = UserId + Version(Count) pub(super) backupid_etag: Arc, // BackupId = UserId + Version(Count) @@ -352,7 +384,11 @@ impl KeyValueDatabase { referencedevents: builder.open_tree("referencedevents")?, roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?, roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?, - mediaid_file: builder.open_tree("mediaid_file")?, + servernamemediaid_metadata: builder.open_tree("servernamemediaid_metadata")?, + filehash_servername_mediaid: builder.open_tree("filehash_servername_mediaid")?, + filehash_metadata: builder.open_tree("filehash_metadata")?, + thumbnailid_metadata: builder.open_tree("thumbnailid_metadata")?, + filehash_thumbnailid: builder.open_tree("filehash_thumbnailid")?, backupid_algorithm: builder.open_tree("backupid_algorithm")?, backupid_etag: builder.open_tree("backupid_etag")?, backupkeyid_backup: builder.open_tree("backupkeyid_backup")?, @@ -415,7 +451,7 @@ impl KeyValueDatabase { } // If the database has any data, perform data migrations before starting - let latest_database_version = 16; + let latest_database_version = 17; if services().users.count()? > 0 { // MIGRATIONS @@ -462,16 +498,19 @@ impl KeyValueDatabase { } if services().globals.database_version()? < 3 { + let tree = db._db.open_tree("mediaid_file")?; // Move media to filesystem - for (key, content) in db.mediaid_file.iter() { + for (key, content) in tree.iter() { if content.is_empty() { continue; } - let path = services().globals.get_media_file(&key); + let path = services() + .globals + .get_media_file_old_only_use_for_migrations(&key); let mut file = fs::File::create(path)?; file.write_all(&content)?; - db.mediaid_file.insert(&key, &[])?; + tree.insert(&key, &[])?; } services().globals.bump_database_version(3)?; @@ -933,16 +972,23 @@ impl KeyValueDatabase { } if services().globals.database_version()? < 16 { + let tree = db._db.open_tree("mediaid_file")?; // Reconstruct all media using the filesystem - db.mediaid_file.clear().unwrap(); + tree.clear().unwrap(); - for file in fs::read_dir(services().globals.get_media_folder()).unwrap() { + for file in fs::read_dir( + services() + .globals + .get_media_folder_only_use_for_migrations(), + ) + .unwrap() + { let file = file.unwrap(); let file_name = file.file_name().into_string().unwrap(); let mediaid = general_purpose::URL_SAFE_NO_PAD.decode(&file_name).unwrap(); - if let Err(e) = migrate_content_disposition_format(mediaid, db) { + if let Err(e) = migrate_content_disposition_format(mediaid, &tree) { error!("Error migrating media file with name \"{file_name}\": {e}"); return Err(e); } @@ -952,6 +998,55 @@ impl KeyValueDatabase { warn!("Migration: 13 -> 16 finished"); } + if services().globals.database_version()? < 17 { + warn!("Migrating media repository to new format. If you have a lot of media stored, this may take a while, so please be patiant!"); + + let tree = db._db.open_tree("mediaid_file")?; + tree.clear().unwrap(); + + let mxc_prefix = general_purpose::URL_SAFE_NO_PAD.encode(b"mxc://"); + for file in fs::read_dir( + services() + .globals + .get_media_folder_only_use_for_migrations(), + ) + .unwrap() + .filter_map(Result::ok) + .filter(|result| { + result.file_type().unwrap().is_file() + && result + .file_name() + .to_str() + .unwrap() + .starts_with(&mxc_prefix) + }) { + let file_name = file.file_name().into_string().unwrap(); + + if let Err(e) = migrate_to_sha256_media( + db, + &file_name, + file.metadata() + .ok() + .and_then(|meta| meta.created().ok()) + .and_then(|time| time.duration_since(UNIX_EPOCH).ok()) + .map(|dur| dur.as_secs()), + file.metadata() + .ok() + .and_then(|meta| meta.accessed().ok()) + .and_then(|time| time.duration_since(UNIX_EPOCH).ok()) + .map(|dur| dur.as_secs()), + ) + .await + { + error!("Error migrating media file with name \"{file_name}\": {e}"); + return Err(e); + } + } + services().globals.bump_database_version(17)?; + + warn!("Migration: 16 -> 17 finished"); + } + assert_eq!( services().globals.database_version().unwrap(), latest_database_version @@ -1117,7 +1212,7 @@ impl KeyValueDatabase { fn migrate_content_disposition_format( mediaid: Vec, - db: &KeyValueDatabase, + tree: &Arc, ) -> Result<(), Error> { let mut parts = mediaid.rsplit(|&b| b == 0xff); let mut removed_bytes = 0; @@ -1153,28 +1248,165 @@ fn migrate_content_disposition_format( // Some file names are too long. Ignore those. match fs::rename( - services().globals.get_media_file(&mediaid), - services().globals.get_media_file(&new_key), + services() + .globals + .get_media_file_old_only_use_for_migrations(&mediaid), + services() + .globals + .get_media_file_old_only_use_for_migrations(&new_key), ) { Ok(_) => { - db.mediaid_file.insert(&new_key, &[])?; + tree.insert(&new_key, &[])?; } Err(_) => { fs::rename( - services().globals.get_media_file(&mediaid), - services().globals.get_media_file(&shorter_key), + services() + .globals + .get_media_file_old_only_use_for_migrations(&mediaid), + services() + .globals + .get_media_file_old_only_use_for_migrations(&shorter_key), ) .unwrap(); - db.mediaid_file.insert(&shorter_key, &[])?; + tree.insert(&shorter_key, &[])?; } } } else { - db.mediaid_file.insert(&mediaid, &[])?; + tree.insert(&mediaid, &[])?; }; Ok(()) } +async fn migrate_to_sha256_media( + db: &KeyValueDatabase, + file_name: &str, + creation: Option, + last_accessed: Option, +) -> Result<()> { + use crate::service::media::size; + + let media_info = general_purpose::URL_SAFE_NO_PAD.decode(file_name).unwrap(); + + let mxc_dimension_splitter_pos = media_info + .iter() + .position(|&b| b == 0xff) + .ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))?; + + let mxc = utils::string_from_bytes(&media_info[..mxc_dimension_splitter_pos]) + .map(OwnedMxcUri::from) + .map_err(|_| Error::BadDatabase("MXC from file's name is invalid UTF-8."))?; + let (server_name, media_id) = mxc + .parts() + .map_err(|_| Error::BadDatabase("MXC from file's name is invalid."))?; + + let width_height = media_info + .get(mxc_dimension_splitter_pos + 1..mxc_dimension_splitter_pos + 9) + .ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))?; + + let mut parts = media_info + .get(mxc_dimension_splitter_pos + 10..) + .ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))? + .split(|&b| b == 0xff); + + let content_disposition_bytes = parts.next().ok_or_else(|| { + Error::BadDatabase( + "Media ID parsed from file's name is invalid: Missing Content Disposition.", + ) + })?; + + let content_disposition = content_disposition_bytes.try_into().unwrap_or_else(|_| { + ruma::http_headers::ContentDisposition::new( + ruma::http_headers::ContentDispositionType::Inline, + ) + }); + + let content_type = parts + .next() + .map(|bytes| { + utils::string_from_bytes(bytes) + .map_err(|_| Error::BadDatabase("Content type from file's name is invalid UTF-8.")) + }) + .transpose()?; + + let mut path = services() + .globals + .get_media_folder_only_use_for_migrations(); + path.push(file_name); + + let mut file = Vec::new(); + + tokio::fs::File::open(&path) + .await? + .read_to_end(&mut file) + .await?; + let sha256_digest = Sha256::digest(&file); + + let mut zero_zero = 0u32.to_be_bytes().to_vec(); + zero_zero.extend_from_slice(&0u32.to_be_bytes()); + + let mut key = sha256_digest.to_vec(); + + let now = utils::secs_since_unix_epoch(); + let metadata = FilehashMetadata::new_with_times( + size(&file)?, + creation.unwrap_or(now), + last_accessed.unwrap_or(now), + ); + + db.filehash_metadata.insert(&key, metadata.value())?; + + // If not a thumbnail + if width_height == zero_zero { + key.extend_from_slice(server_name.as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + db.filehash_servername_mediaid.insert(&key, &[])?; + + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + let mut value = sha256_digest.to_vec(); + value.extend_from_slice(content_disposition.filename.unwrap_or_default().as_bytes()); + value.push(0xff); + value.extend_from_slice(content_type.unwrap_or_default().as_bytes()); + // To mark as available on unauthenticated endpoints + value.push(0xff); + + db.servernamemediaid_metadata.insert(&key, &value)?; + } else { + key.extend_from_slice(server_name.as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + key.push(0xff); + key.extend_from_slice(width_height); + + db.filehash_thumbnailid.insert(&key, &[])?; + + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + key.push(0xff); + key.extend_from_slice(width_height); + + let mut value = sha256_digest.to_vec(); + value.extend_from_slice(content_disposition.filename.unwrap_or_default().as_bytes()); + value.push(0xff); + value.extend_from_slice(content_type.unwrap_or_default().as_bytes()); + // To mark as available on unauthenticated endpoints + value.push(0xff); + + db.thumbnailid_metadata.insert(&key, &value)?; + } + + crate::service::media::create_file(&hex::encode(sha256_digest), &file).await?; + tokio::fs::remove_file(path).await?; + + Ok(()) +} + /// Sets the emergency password and push rules for the @conduit account in case emergency password is set fn set_emergency_access() -> Result { let conduit_user = services().globals.server_user(); diff --git a/src/main.rs b/src/main.rs index 6ce5f822..96aa2714 100644 --- a/src/main.rs +++ b/src/main.rs @@ -45,7 +45,7 @@ use tikv_jemallocator::Jemalloc; #[global_allocator] static GLOBAL: Jemalloc = Jemalloc; -static SUB_TABLES: [&str; 2] = ["well_known", "tls"]; // Not doing `proxy` cause setting that with env vars would be a pain +static SUB_TABLES: [&str; 3] = ["well_known", "tls", "media"]; // Not doing `proxy` cause setting that with env vars would be a pain #[tokio::main] async fn main() { diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 4f401ad3..d7cf19b1 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -7,7 +7,10 @@ use ruma::{ use crate::api::server_server::DestinationResponse; -use crate::{config::TurnConfig, services, Config, Error, Result}; +use crate::{ + config::{MediaConfig, TurnConfig}, + services, Config, Error, Result, +}; use futures_util::FutureExt; use hickory_resolver::TokioAsyncResolver; use hyper_util::client::legacy::connect::dns::{GaiResolver, Name as HyperName}; @@ -35,8 +38,6 @@ use tokio::sync::{broadcast, watch::Receiver, Mutex, RwLock, Semaphore}; use tower_service::Service as TowerService; use tracing::{error, info}; -use base64::{engine::general_purpose, Engine as _}; - type WellKnownMap = HashMap; type TlsNameMap = HashMap, u16)>; type RateLimitState = (Instant, u32); // Time if last failed try, number of failed tries @@ -227,7 +228,11 @@ impl Service { shutdown: AtomicBool::new(false), }; - fs::create_dir_all(s.get_media_folder())?; + // Remove this exception once other media backends are added + #[allow(irrefutable_let_patterns)] + if let MediaConfig::FileSystem { path } = &s.config.media { + fs::create_dir_all(path)?; + } if !s .supported_room_versions() @@ -477,18 +482,13 @@ impl Service { self.db.bump_database_version(new_version) } - pub fn get_media_folder(&self) -> PathBuf { + pub fn get_media_path(&self, media_directory: &str, sha256_hex: &str) -> PathBuf { let mut r = PathBuf::new(); - r.push(self.config.database_path.clone()); - r.push("media"); - r - } + r.push(media_directory); + + //TODO: Directory distribution + r.push(sha256_hex); - pub fn get_media_file(&self, key: &[u8]) -> PathBuf { - let mut r = PathBuf::new(); - r.push(self.config.database_path.clone()); - r.push("media"); - r.push(general_purpose::URL_SAFE_NO_PAD.encode(key)); r } diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 844aa995..254cee83 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -1,22 +1,43 @@ -use ruma::http_headers::ContentDisposition; +use ruma::ServerName; +use sha2::{digest::Output, Sha256}; use crate::Result; +use super::DbFileMeta; + pub trait Data: Send + Sync { fn create_file_metadata( &self, - mxc: String, - width: u32, - height: u32, - content_disposition: &ContentDisposition, + sha256_digest: Output, + file_size: u64, + servername: &ServerName, + media_id: &str, + filename: Option<&str>, content_type: Option<&str>, - ) -> Result>; + ) -> Result<()>; - /// Returns content_disposition, content_type and the metadata key. - fn search_file_metadata( + fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result; + + #[allow(clippy::too_many_arguments)] + fn create_thumbnail_metadata( &self, - mxc: String, + sha256_digest: Output, + file_size: u64, + servername: &ServerName, + media_id: &str, width: u32, height: u32, - ) -> Result<(ContentDisposition, Option, Vec)>; + filename: Option<&str>, + content_type: Option<&str>, + ) -> Result<()>; + + // Returns the sha256 hash, filename and content_type and whether the media should be accessible via + /// unauthenticated endpoints. + fn search_thumbnail_metadata( + &self, + servername: &ServerName, + media_id: &str, + width: u32, + height: u32, + ) -> Result; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index fed7c6b9..7fe21cc3 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -3,16 +3,25 @@ use std::io::Cursor; pub use data::Data; use ruma::{ - api::client::error::ErrorKind, + api::client::{error::ErrorKind, media::is_safe_inline_content_type}, http_headers::{ContentDisposition, ContentDispositionType}, + ServerName, }; +use sha2::{digest::Output, Digest, Sha256}; -use crate::{services, Result}; +use crate::{config::MediaConfig, services, Error, Result}; use image::imageops::FilterType; +pub struct DbFileMeta { + pub sha256_digest: Vec, + pub filename: Option, + pub content_type: Option, + pub unauthenticated_access_permitted: bool, +} + use tokio::{ fs::File, - io::{AsyncReadExt, AsyncWriteExt, BufReader}, + io::{AsyncReadExt, AsyncWriteExt}, }; pub struct FileMeta { @@ -29,69 +38,70 @@ impl Service { /// Uploads a file. pub async fn create( &self, - mxc: String, - content_disposition: Option, + servername: &ServerName, + media_id: &str, + filename: Option<&str>, content_type: Option<&str>, file: &[u8], ) -> Result<()> { - let content_disposition = - content_disposition.unwrap_or(ContentDisposition::new(ContentDispositionType::Inline)); + let (sha256_digest, sha256_hex) = generate_digests(file); - // Width, Height = 0 if it's not a thumbnail - let key = self - .db - .create_file_metadata(mxc, 0, 0, &content_disposition, content_type)?; + self.db.create_file_metadata( + sha256_digest, + size(file)?, + servername, + media_id, + filename, + content_type, + )?; - let path = services().globals.get_media_file(&key); - let mut f = File::create(path).await?; - f.write_all(file).await?; - Ok(()) + create_file(&sha256_hex, file).await } /// Uploads or replaces a file thumbnail. #[allow(clippy::too_many_arguments)] pub async fn upload_thumbnail( &self, - mxc: String, + servername: &ServerName, + media_id: &str, + filename: Option<&str>, content_type: Option<&str>, width: u32, height: u32, file: &[u8], ) -> Result<()> { - let key = self.db.create_file_metadata( - mxc, + let (sha256_digest, sha256_hex) = generate_digests(file); + + self.db.create_thumbnail_metadata( + sha256_digest, + size(file)?, + servername, + media_id, width, height, - &ContentDisposition::new(ContentDispositionType::Inline), + filename, content_type, )?; - let path = services().globals.get_media_file(&key); - let mut f = File::create(path).await?; - f.write_all(file).await?; - - Ok(()) + create_file(&sha256_hex, file).await } - /// Downloads a file. - pub async fn get(&self, mxc: String) -> Result> { - if let Ok((content_disposition, content_type, key)) = - self.db.search_file_metadata(mxc, 0, 0) - { - let path = services().globals.get_media_file(&key); - let mut file = Vec::new(); - BufReader::new(File::open(path).await?) - .read_to_end(&mut file) - .await?; + /// Fetches a local file and it's metadata + pub async fn get(&self, servername: &ServerName, media_id: &str) -> Result> { + let DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted: _, + } = self.db.search_file_metadata(servername, media_id)?; - Ok(Some(FileMeta { - content_disposition, - content_type, - file, - })) - } else { - Ok(None) - } + let file = get_file(&hex::encode(sha256_digest)).await?; + + Ok(Some(FileMeta { + content_disposition: content_disposition(filename, &content_type), + content_type, + file, + })) } /// Returns width, height of the thumbnail and whether it should be cropped. Returns None when @@ -119,117 +129,206 @@ impl Service { /// For width,height <= 96 the server uses another thumbnailing algorithm which crops the image afterwards. pub async fn get_thumbnail( &self, - mxc: String, + servername: &ServerName, + media_id: &str, width: u32, height: u32, ) -> Result> { - let (width, height, crop) = self - .thumbnail_properties(width, height) - .unwrap_or((0, 0, false)); // 0, 0 because that's the original file - - if let Ok((content_disposition, content_type, key)) = - self.db.search_file_metadata(mxc.clone(), width, height) - { - // Using saved thumbnail - let path = services().globals.get_media_file(&key); - let mut file = Vec::new(); - File::open(path).await?.read_to_end(&mut file).await?; - - Ok(Some(FileMeta { - content_disposition, + if let Some((width, height, crop)) = self.thumbnail_properties(width, height) { + if let Ok(DbFileMeta { + sha256_digest, + filename, content_type, - file: file.to_vec(), - })) - } else if let Ok((content_disposition, content_type, key)) = - self.db.search_file_metadata(mxc.clone(), 0, 0) - { - // Generate a thumbnail - let path = services().globals.get_media_file(&key); - let mut file = Vec::new(); - File::open(path).await?.read_to_end(&mut file).await?; - - if let Ok(image) = image::load_from_memory(&file) { - let original_width = image.width(); - let original_height = image.height(); - if width > original_width || height > original_height { - return Ok(Some(FileMeta { - content_disposition, - content_type, - file: file.to_vec(), - })); - } - - let thumbnail = if crop { - image.resize_to_fill(width, height, FilterType::CatmullRom) - } else { - let (exact_width, exact_height) = { - // Copied from image::dynimage::resize_dimensions - let ratio = u64::from(original_width) * u64::from(height); - let nratio = u64::from(width) * u64::from(original_height); - - let use_width = nratio <= ratio; - let intermediate = if use_width { - u64::from(original_height) * u64::from(width) - / u64::from(original_width) - } else { - u64::from(original_width) * u64::from(height) - / u64::from(original_height) - }; - if use_width { - if intermediate <= u64::from(u32::MAX) { - (width, intermediate as u32) - } else { - ( - (u64::from(width) * u64::from(u32::MAX) / intermediate) as u32, - u32::MAX, - ) - } - } else if intermediate <= u64::from(u32::MAX) { - (intermediate as u32, height) - } else { - ( - u32::MAX, - (u64::from(height) * u64::from(u32::MAX) / intermediate) as u32, - ) - } - }; - - image.thumbnail_exact(exact_width, exact_height) - }; - - let mut thumbnail_bytes = Vec::new(); - thumbnail.write_to( - &mut Cursor::new(&mut thumbnail_bytes), - image::ImageFormat::Png, - )?; - - // Save thumbnail in database so we don't have to generate it again next time - let thumbnail_key = self.db.create_file_metadata( - mxc, - width, - height, - &content_disposition, - content_type.as_deref(), - )?; - - let path = services().globals.get_media_file(&thumbnail_key); - let mut f = File::create(path).await?; - f.write_all(&thumbnail_bytes).await?; + unauthenticated_access_permitted: _, + }) = self + .db + .search_thumbnail_metadata(servername, media_id, width, height) + { + // Using saved thumbnail + let file = get_file(&hex::encode(sha256_digest)).await?; Ok(Some(FileMeta { - content_disposition, + content_disposition: content_disposition(filename, &content_type), content_type, - file: thumbnail_bytes.to_vec(), + file, })) + } else if let Ok(DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted: _, + }) = self.db.search_file_metadata(servername, media_id) + { + let content_disposition = content_disposition(filename.clone(), &content_type); + // Generate a thumbnail + let file = get_file(&hex::encode(sha256_digest)).await?; + + if let Ok(image) = image::load_from_memory(&file) { + let original_width = image.width(); + let original_height = image.height(); + if width > original_width || height > original_height { + return Ok(Some(FileMeta { + content_disposition, + content_type, + file, + })); + } + + let thumbnail = if crop { + image.resize_to_fill(width, height, FilterType::CatmullRom) + } else { + let (exact_width, exact_height) = { + // Copied from image::dynimage::resize_dimensions + let ratio = u64::from(original_width) * u64::from(height); + let nratio = u64::from(width) * u64::from(original_height); + + let use_width = nratio <= ratio; + let intermediate = if use_width { + u64::from(original_height) * u64::from(width) + / u64::from(original_width) + } else { + u64::from(original_width) * u64::from(height) + / u64::from(original_height) + }; + if use_width { + if intermediate <= u64::from(u32::MAX) { + (width, intermediate as u32) + } else { + ( + (u64::from(width) * u64::from(u32::MAX) / intermediate) + as u32, + u32::MAX, + ) + } + } else if intermediate <= u64::from(u32::MAX) { + (intermediate as u32, height) + } else { + ( + u32::MAX, + (u64::from(height) * u64::from(u32::MAX) / intermediate) as u32, + ) + } + }; + + image.thumbnail_exact(exact_width, exact_height) + }; + + let mut thumbnail_bytes = Vec::new(); + thumbnail.write_to( + &mut Cursor::new(&mut thumbnail_bytes), + image::ImageFormat::Png, + )?; + + // Save thumbnail in database so we don't have to generate it again next time + self.upload_thumbnail( + servername, + media_id, + filename.as_deref(), + content_type.as_deref(), + width, + height, + &thumbnail_bytes, + ) + .await?; + + Ok(Some(FileMeta { + content_disposition, + content_type, + file: thumbnail_bytes, + })) + } else { + // Couldn't parse file to generate thumbnail, likely not an image + Err(Error::BadRequest( + ErrorKind::Unknown, + "Unable to generate thumbnail for the requested content (likely is not an image)", + )) + } } else { - // Couldn't parse file to generate thumbnail, likely not an image - return Err(crate::Error::BadRequest( - ErrorKind::Unknown, - "Unable to generate thumbnail for the requested content (likely is not an image)", - )); + Ok(None) } } else { - Ok(None) + // Using full-sized file + let Ok(DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted: _, + }) = self.db.search_file_metadata(servername, media_id) + else { + return Ok(None); + }; + + let file = get_file(&hex::encode(sha256_digest)).await?; + + Ok(Some(FileMeta { + content_disposition: content_disposition(filename, &content_type), + content_type, + file, + })) } } } + +/// Creates the media file, using the configured media backend +/// +/// Note: this function does NOT set the metadata related to the file +pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> { + match &services().globals.config.media { + MediaConfig::FileSystem { path } => { + let path = services().globals.get_media_path(path, sha256_hex); + + let mut f = File::create(path).await?; + f.write_all(file).await?; + } + } + + Ok(()) +} + +/// Fetches the file from the configured media backend +async fn get_file(sha256_hex: &str) -> Result> { + Ok(match &services().globals.config.media { + MediaConfig::FileSystem { path } => { + let path = services().globals.get_media_path(path, sha256_hex); + + let mut file = Vec::new(); + File::open(path).await?.read_to_end(&mut file).await?; + + file + } + }) +} + +/// Creates a content disposition with the given `filename`, using the `content_type` to determine whether +/// the disposition should be `inline` or `attachment` +fn content_disposition( + filename: Option, + content_type: &Option, +) -> ContentDisposition { + ContentDisposition::new( + if content_type + .as_deref() + .is_some_and(is_safe_inline_content_type) + { + ContentDispositionType::Inline + } else { + ContentDispositionType::Attachment + }, + ) + .with_filename(filename) +} + +/// Returns sha256 digests of the file, in raw (Vec) and hex form respectively +fn generate_digests(file: &[u8]) -> (Output, String) { + let sha256_digest = Sha256::digest(file); + let hex_sha256 = hex::encode(sha256_digest); + + (sha256_digest, hex_sha256) +} + +/// Get's the file size, is bytes, as u64, returning an error if the file size is larger +/// than a u64 (which is far too big to be reasonably uploaded in the first place anyways) +pub fn size(file: &[u8]) -> Result { + u64::try_from(file.len()) + .map_err(|_| Error::BadRequest(ErrorKind::TooLarge, "File is too large")) +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index d09a1033..69ec809f 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -18,6 +18,13 @@ pub fn millis_since_unix_epoch() -> u64 { .as_millis() as u64 } +pub fn secs_since_unix_epoch() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time is valid") + .as_secs() +} + pub fn increment(old: Option<&[u8]>) -> Option> { let number = match old.map(|bytes| bytes.try_into()) { Some(Ok(bytes)) => { From 66a14ac8027e1a00cc23473b178b7e0ba43067ee Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 23 Mar 2025 15:57:17 +0000 Subject: [PATCH 04/15] feat: freeze unauthenticated media --- src/api/client_server/media.rs | 31 ++++++++++++++++++++++++++----- src/api/server_server.rs | 3 ++- src/service/media/mod.rs | 34 +++++++++++++++++++++++++++++----- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 29e4592f..70e35668 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -166,7 +166,13 @@ pub async fn get_content_route( file, content_disposition, content_type, - } = get_content(&body.server_name, body.media_id.clone(), body.allow_remote).await?; + } = get_content( + &body.server_name, + body.media_id.clone(), + body.allow_remote, + false, + ) + .await?; Ok(media::get_content::v3::Response { file, @@ -182,19 +188,23 @@ pub async fn get_content_route( pub async fn get_content_auth_route( body: Ruma, ) -> Result { - get_content(&body.server_name, body.media_id.clone(), true).await + get_content(&body.server_name, body.media_id.clone(), true, true).await } async fn get_content( server_name: &ServerName, media_id: String, allow_remote: bool, + authenticated: bool, ) -> Result { if let Ok(Some(FileMeta { content_disposition, content_type, file, - })) = services().media.get(server_name, &media_id).await + })) = services() + .media + .get(server_name, &media_id, authenticated) + .await { Ok(get_content::v1::Response { file, @@ -231,6 +241,7 @@ pub async fn get_content_as_filename_route( body.media_id.clone(), body.filename.clone(), body.allow_remote, + false, ) .await?; @@ -253,6 +264,7 @@ pub async fn get_content_as_filename_auth_route( body.media_id.clone(), body.filename.clone(), true, + true, ) .await } @@ -262,10 +274,14 @@ async fn get_content_as_filename( media_id: String, filename: String, allow_remote: bool, + authenticated: bool, ) -> Result { if let Ok(Some(FileMeta { file, content_type, .. - })) = services().media.get(server_name, &media_id).await + })) = services() + .media + .get(server_name, &media_id, authenticated) + .await { Ok(get_content_as_filename::v1::Response { file, @@ -311,6 +327,7 @@ pub async fn get_content_thumbnail_route( body.method.clone(), body.animated, body.allow_remote, + false, ) .await?; @@ -336,10 +353,12 @@ pub async fn get_content_thumbnail_auth_route( body.method.clone(), body.animated, true, + true, ) .await } +#[allow(clippy::too_many_arguments)] async fn get_content_thumbnail( server_name: &ServerName, media_id: String, @@ -348,6 +367,7 @@ async fn get_content_thumbnail( method: Option, animated: Option, allow_remote: bool, + authenticated: bool, ) -> Result { if let Some(FileMeta { file, @@ -364,6 +384,7 @@ async fn get_content_thumbnail( height .try_into() .map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Height is invalid."))?, + authenticated, ) .await? { @@ -372,7 +393,7 @@ async fn get_content_thumbnail( content_type, content_disposition: Some(content_disposition), }) - } else if server_name != services().globals.server_name() && allow_remote { + } else if server_name != services().globals.server_name() && allow_remote && authenticated { let thumbnail_response = match services() .sending .send_federation_request( diff --git a/src/api/server_server.rs b/src/api/server_server.rs index 01ec47d3..3f780ebd 100644 --- a/src/api/server_server.rs +++ b/src/api/server_server.rs @@ -2227,7 +2227,7 @@ pub async fn get_content_route( file, }) = services() .media - .get(services().globals.server_name(), &body.media_id) + .get(services().globals.server_name(), &body.media_id, true) .await? { Ok(get_content::v1::Response::new( @@ -2264,6 +2264,7 @@ pub async fn get_content_thumbnail_route( body.height .try_into() .map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Width is invalid."))?, + true, ) .await? else { diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 7fe21cc3..81d66210 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -87,14 +87,23 @@ impl Service { } /// Fetches a local file and it's metadata - pub async fn get(&self, servername: &ServerName, media_id: &str) -> Result> { + pub async fn get( + &self, + servername: &ServerName, + media_id: &str, + authenticated: bool, + ) -> Result> { let DbFileMeta { sha256_digest, filename, content_type, - unauthenticated_access_permitted: _, + unauthenticated_access_permitted, } = self.db.search_file_metadata(servername, media_id)?; + if !(authenticated || unauthenticated_access_permitted) { + return Ok(None); + } + let file = get_file(&hex::encode(sha256_digest)).await?; Ok(Some(FileMeta { @@ -133,17 +142,22 @@ impl Service { media_id: &str, width: u32, height: u32, + authenticated: bool, ) -> Result> { if let Some((width, height, crop)) = self.thumbnail_properties(width, height) { if let Ok(DbFileMeta { sha256_digest, filename, content_type, - unauthenticated_access_permitted: _, + unauthenticated_access_permitted, }) = self .db .search_thumbnail_metadata(servername, media_id, width, height) { + if !(authenticated || unauthenticated_access_permitted) { + return Ok(None); + } + // Using saved thumbnail let file = get_file(&hex::encode(sha256_digest)).await?; @@ -152,13 +166,19 @@ impl Service { content_type, file, })) + } else if !authenticated { + return Ok(None); } else if let Ok(DbFileMeta { sha256_digest, filename, content_type, - unauthenticated_access_permitted: _, + unauthenticated_access_permitted, }) = self.db.search_file_metadata(servername, media_id) { + if !(authenticated || unauthenticated_access_permitted) { + return Ok(None); + } + let content_disposition = content_disposition(filename.clone(), &content_type); // Generate a thumbnail let file = get_file(&hex::encode(sha256_digest)).await?; @@ -252,12 +272,16 @@ impl Service { sha256_digest, filename, content_type, - unauthenticated_access_permitted: _, + unauthenticated_access_permitted, }) = self.db.search_file_metadata(servername, media_id) else { return Ok(None); }; + if !(authenticated || unauthenticated_access_permitted) { + return Ok(None); + } + let file = get_file(&hex::encode(sha256_digest)).await?; Ok(Some(FileMeta { From 19d0ea408cc955a6a04dcca3dbc7733fcdaffbef Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 23 Mar 2025 17:23:57 +0000 Subject: [PATCH 05/15] feat(media): deep hashed directory structure --- docs/configuration.md | 22 +++++++++- src/config/mod.rs | 84 +++++++++++++++++++++++++++++++++++--- src/main.rs | 54 ++++++++++++++++-------- src/service/globals/mod.rs | 30 +++++++++++--- src/service/media/mod.rs | 18 ++++++-- 5 files changed, 173 insertions(+), 35 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index ffbfa512..3323fb64 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -64,7 +64,7 @@ The `global` section contains the following fields: ### Media The `media` table is used to configure how media is stored and where. Currently, there is only one available backend, that being `filesystem`. The backend can be set using the `backend` field. Example: -``` +```toml [global.media] backend = "filesystem" # the default backend ``` @@ -73,12 +73,30 @@ backend = "filesystem" # the default backend The filesystem backend has the following fields: - `path`: The base directory where all the media files will be stored (defaults to `${database_path}/media`) +- `directory_structure`: This is a table, used to configure how files are to be distributed within + the media directory. It has the following fields: + - `depth`: The number sub-directories that should be created for files (default: `2`) + - `length`: How long the name of these sub-directories should be (default: `2`) + For example, a file may regularly have the name `98ea6e4f216f2fb4b69fff9b3a44842c38686ca685f3f55dc48c5d3fb1107be4` + (The SHA256 digest of the file's content). If `depth` and `length` were both set to `2`, this file would be stored + at `${path}/98/ea/6e4f216f2fb4b69fff9b3a44842c38686ca685f3f55dc48c5d3fb1107be4`. If you want to instead have all + media files in the base directory with no sub-directories, just set `directory_structure` to be empty, as follows: + ```toml + [global.media] + backend = "filesystem" + + [global.media.directory_structure] + ``` ##### Example: -``` +```toml [global.media] backend = "filesystem" path = "/srv/matrix-media" + +[global.media.directory_structure] +depth = 4 +length = 2 ``` ### TLS diff --git a/src/config/mod.rs b/src/config/mod.rs index 370dcfec..46dcf7e0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2,6 +2,7 @@ use std::{ collections::BTreeMap, fmt, net::{IpAddr, Ipv4Addr}, + num::NonZeroU8, path::PathBuf, }; @@ -10,10 +11,13 @@ use serde::{de::IgnoredAny, Deserialize}; use tracing::warn; use url::Url; -mod proxy; +use crate::Error; +mod proxy; use self::proxy::ProxyConfig; +const SHA256_HEX_LENGTH: u8 = 64; + #[derive(Deserialize)] pub struct IncompleteConfig { #[serde(default = "default_address")] @@ -218,7 +222,10 @@ impl From for Config { }; let media = match media { - IncompleteMediaConfig::FileSystem { path } => MediaConfig::FileSystem { + IncompleteMediaConfig::FileSystem { + path, + directory_structure, + } => MediaConfig::FileSystem { path: path.unwrap_or_else(|| { // We do this as we don't know if the path has a trailing slash, or even if the // path separator is a forward or backward slash @@ -229,6 +236,7 @@ impl From for Config { .into_string() .expect("Both inputs are valid UTF-8") }), + directory_structure, }, }; @@ -309,21 +317,85 @@ pub struct WellKnownConfig { pub server: OwnedServerName, } -#[derive(Clone, Debug, Deserialize)] +#[derive(Deserialize)] #[serde(tag = "backend", rename_all = "lowercase")] pub enum IncompleteMediaConfig { - FileSystem { path: Option }, + FileSystem { + path: Option, + #[serde(default)] + directory_structure: DirectoryStructure, + }, } impl Default for IncompleteMediaConfig { fn default() -> Self { - Self::FileSystem { path: None } + Self::FileSystem { + path: None, + directory_structure: DirectoryStructure::default(), + } } } #[derive(Debug, Clone)] pub enum MediaConfig { - FileSystem { path: String }, + FileSystem { + path: String, + directory_structure: DirectoryStructure, + }, +} + +#[derive(Debug, Clone, Deserialize)] +// See https://github.com/serde-rs/serde/issues/642#issuecomment-525432907 +#[serde(try_from = "ShadowDirectoryStructure", untagged)] +pub enum DirectoryStructure { + // We do this enum instead of Option, so that we can have the structure be + // deep by default, while still providing a away for it to be flat (by creating an empty table) + // + // e.g.: + // ```toml + // [global.media.directory_structure] + // ``` + Flat, + Deep { length: NonZeroU8, depth: NonZeroU8 }, +} + +impl Default for DirectoryStructure { + fn default() -> Self { + Self::Deep { + length: NonZeroU8::new(2).expect("2 is not 0"), + depth: NonZeroU8::new(2).expect("2 is not 0"), + } + } +} + +#[derive(Deserialize)] +#[serde(untagged)] +enum ShadowDirectoryStructure { + Flat {}, + Deep { length: NonZeroU8, depth: NonZeroU8 }, +} + +impl TryFrom for DirectoryStructure { + type Error = Error; + + fn try_from(value: ShadowDirectoryStructure) -> Result { + match value { + ShadowDirectoryStructure::Flat {} => Ok(Self::Flat), + ShadowDirectoryStructure::Deep { length, depth } => { + if length + .get() + .checked_mul(depth.get()) + .map(|product| product < SHA256_HEX_LENGTH) + // If an overflow occurs, it definitely isn't less than SHA256_HEX_LENGTH + .unwrap_or(false) + { + Ok(Self::Deep { length, depth }) + } else { + Err(Error::bad_config("The media directory structure depth multiplied by the depth is equal to or greater than a sha256 hex hash, please reduce at least one of the two so that their product is less than 64")) + } + } + } + } } const DEPRECATED_KEYS: &[&str] = &[ diff --git a/src/main.rs b/src/main.rs index 96aa2714..01af9ad2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,33 +47,53 @@ static GLOBAL: Jemalloc = Jemalloc; static SUB_TABLES: [&str; 3] = ["well_known", "tls", "media"]; // Not doing `proxy` cause setting that with env vars would be a pain +// Yeah, I know it's terrible, but since it seems the container users dont want syntax like A[B][C]="...", +// this is what we have to deal with. Also see: https://github.com/SergioBenitez/Figment/issues/12#issuecomment-801449465 +static SUB_SUB_TABLES: [&str; 1] = ["directory_structure"]; + #[tokio::main] async fn main() { clap::parse(); // Initialize config - let raw_config = - Figment::new() - .merge( - Toml::file(Env::var("CONDUIT_CONFIG").expect( + let raw_config = Figment::new() + .merge( + Toml::file( + Env::var("CONDUIT_CONFIG").expect( "The CONDUIT_CONFIG env var needs to be set. Example: /etc/conduit.toml", - )) - .nested(), + ), ) - .merge(Env::prefixed("CONDUIT_").global().map(|k| { - let mut key: Uncased = k.into(); + .nested(), + ) + .merge(Env::prefixed("CONDUIT_").global().map(|k| { + let mut key: Uncased = k.into(); - for table in SUB_TABLES { - if k.starts_with(&(table.to_owned() + "_")) { - key = Uncased::from( - table.to_owned() + "." + k[table.len() + 1..k.len()].as_str(), - ); - break; + 'outer: for table in SUB_TABLES { + if k.starts_with(&(table.to_owned() + "_")) { + for sub_table in SUB_SUB_TABLES { + if k.starts_with(&(table.to_owned() + "_" + sub_table + "_")) { + key = Uncased::from( + table.to_owned() + + "." + + sub_table + + "." + + k[table.len() + 1 + sub_table.len() + 1..k.len()].as_str(), + ); + + break 'outer; + } } - } - key - })); + key = Uncased::from( + table.to_owned() + "." + k[table.len() + 1..k.len()].as_str(), + ); + + break; + } + } + + key + })); let config = match raw_config.extract::() { Ok(s) => s, diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index d7cf19b1..ac77afe9 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -8,7 +8,7 @@ use ruma::{ use crate::api::server_server::DestinationResponse; use crate::{ - config::{MediaConfig, TurnConfig}, + config::{DirectoryStructure, MediaConfig, TurnConfig}, services, Config, Error, Result, }; use futures_util::FutureExt; @@ -230,7 +230,7 @@ impl Service { // Remove this exception once other media backends are added #[allow(irrefutable_let_patterns)] - if let MediaConfig::FileSystem { path } = &s.config.media { + if let MediaConfig::FileSystem { path, .. } = &s.config.media { fs::create_dir_all(path)?; } @@ -482,14 +482,32 @@ impl Service { self.db.bump_database_version(new_version) } - pub fn get_media_path(&self, media_directory: &str, sha256_hex: &str) -> PathBuf { + pub fn get_media_path( + &self, + media_directory: &str, + directory_structure: &DirectoryStructure, + sha256_hex: &str, + ) -> Result { let mut r = PathBuf::new(); r.push(media_directory); - //TODO: Directory distribution - r.push(sha256_hex); + if let DirectoryStructure::Deep { length, depth } = directory_structure { + let mut filename = sha256_hex; + for _ in 0..depth.get() { + let (current_path, next) = filename.split_at(length.get().into()); + filename = next; + r.push(current_path); + } - r + // Create all directories leading up to file + fs::create_dir_all(&r).inspect_err(|e| error!("Error creating leading directories for media with sha256 hash of {sha256_hex}: {e}"))?; + + r.push(filename); + } else { + r.push(sha256_hex); + } + + Ok(r) } pub fn shutdown(&self) { diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 81d66210..447ed566 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -298,8 +298,13 @@ impl Service { /// Note: this function does NOT set the metadata related to the file pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> { match &services().globals.config.media { - MediaConfig::FileSystem { path } => { - let path = services().globals.get_media_path(path, sha256_hex); + MediaConfig::FileSystem { + path, + directory_structure, + } => { + let path = services() + .globals + .get_media_path(path, directory_structure, sha256_hex)?; let mut f = File::create(path).await?; f.write_all(file).await?; @@ -312,8 +317,13 @@ pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> { /// Fetches the file from the configured media backend async fn get_file(sha256_hex: &str) -> Result> { Ok(match &services().globals.config.media { - MediaConfig::FileSystem { path } => { - let path = services().globals.get_media_path(path, sha256_hex); + MediaConfig::FileSystem { + path, + directory_structure, + } => { + let path = services() + .globals + .get_media_path(path, directory_structure, sha256_hex)?; let mut file = Vec::new(); File::open(path).await?.read_to_end(&mut file).await?; From 3171b779c6ed0ddc511b156cc56a43875f92f75c Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sat, 29 Mar 2025 13:20:55 +0000 Subject: [PATCH 06/15] feat(media): save user id of uploader --- src/api/client_server/media.rs | 2 ++ src/database/key_value/media.rs | 24 ++++++++++++++++++++++-- src/database/mod.rs | 6 ++++++ src/service/media/data.rs | 4 +++- src/service/media/mod.rs | 4 +++- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 70e35668..93975475 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -71,6 +71,7 @@ pub async fn create_content_route( filename.as_deref(), content_type.as_deref(), &file, + body.sender_user.as_deref(), ) .await?; @@ -148,6 +149,7 @@ pub async fn get_remote_content( .and_then(|cd| cd.filename.as_deref()), content_response.content_type.as_deref(), &content_response.file, + None, ) .await?; diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 26232b8b..8ab9046b 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,4 +1,4 @@ -use ruma::{api::client::error::ErrorKind, ServerName}; +use ruma::{api::client::error::ErrorKind, ServerName, UserId}; use sha2::{digest::Output, Sha256}; use tracing::error; @@ -17,6 +17,7 @@ impl service::media::Data for KeyValueDatabase { media_id: &str, filename: Option<&str>, content_type: Option<&str>, + user_id: Option<&UserId>, ) -> Result<()> { let metadata = FilehashMetadata::new(file_size); @@ -39,7 +40,26 @@ impl service::media::Data for KeyValueDatabase { value.push(0xff); value.extend_from_slice(content_type.map(|f| f.as_bytes()).unwrap_or_default()); - self.servernamemediaid_metadata.insert(&key, &value) + self.servernamemediaid_metadata.insert(&key, &value)?; + + if let Some(user_id) = user_id { + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(user_id.localpart().as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.servername_userlocalpart_mediaid.insert(&key, &[])?; + + let mut key = servername.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.servernamemediaid_userlocalpart + .insert(&key, user_id.localpart().as_bytes())?; + } + + Ok(()) } fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result { diff --git a/src/database/mod.rs b/src/database/mod.rs index 29520827..925d636c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -179,6 +179,8 @@ pub struct KeyValueDatabase { pub(super) servernamemediaid_metadata: Arc, // Servername + MediaID -> content sha256 + Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints pub(super) filehash_servername_mediaid: Arc, // sha256 of content + Servername + MediaID, used to delete dangling references to filehashes from servernamemediaid pub(super) filehash_metadata: Arc, // sha256 of content -> file size + creation time + last access time + pub(super) servername_userlocalpart_mediaid: Arc, // Servername + User Localpart + MediaID + pub(super) servernamemediaid_userlocalpart: Arc, // Servername + MediaID -> User Localpart, used to remove keys from above when files are deleted by unrelated means pub(super) thumbnailid_metadata: Arc, // ThumbnailId = Servername + MediaID + width + height -> Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints pub(super) filehash_thumbnailid: Arc, // sha256 of content + "ThumbnailId", as defined above. Used to dangling references to filehashes from thumbnailIds //pub key_backups: key_backups::KeyBackups, @@ -387,6 +389,10 @@ impl KeyValueDatabase { servernamemediaid_metadata: builder.open_tree("servernamemediaid_metadata")?, filehash_servername_mediaid: builder.open_tree("filehash_servername_mediaid")?, filehash_metadata: builder.open_tree("filehash_metadata")?, + servername_userlocalpart_mediaid: builder + .open_tree("servername_userlocalpart_mediaid")?, + servernamemediaid_userlocalpart: builder + .open_tree("servernamemediaid_userlocalpart")?, thumbnailid_metadata: builder.open_tree("thumbnailid_metadata")?, filehash_thumbnailid: builder.open_tree("filehash_thumbnailid")?, backupid_algorithm: builder.open_tree("backupid_algorithm")?, diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 254cee83..2301ebbc 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -1,4 +1,4 @@ -use ruma::ServerName; +use ruma::{ServerName, UserId}; use sha2::{digest::Output, Sha256}; use crate::Result; @@ -6,6 +6,7 @@ use crate::Result; use super::DbFileMeta; pub trait Data: Send + Sync { + #[allow(clippy::too_many_arguments)] fn create_file_metadata( &self, sha256_digest: Output, @@ -14,6 +15,7 @@ pub trait Data: Send + Sync { media_id: &str, filename: Option<&str>, content_type: Option<&str>, + user_id: Option<&UserId>, ) -> Result<()>; fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result; diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 447ed566..b325f507 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -5,7 +5,7 @@ pub use data::Data; use ruma::{ api::client::{error::ErrorKind, media::is_safe_inline_content_type}, http_headers::{ContentDisposition, ContentDispositionType}, - ServerName, + ServerName, UserId, }; use sha2::{digest::Output, Digest, Sha256}; @@ -43,6 +43,7 @@ impl Service { filename: Option<&str>, content_type: Option<&str>, file: &[u8], + user_id: Option<&UserId>, ) -> Result<()> { let (sha256_digest, sha256_hex) = generate_digests(file); @@ -53,6 +54,7 @@ impl Service { media_id, filename, content_type, + user_id, )?; create_file(&sha256_hex, file).await From bdf2014cf45da2595604650afa112bba390460ff Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Mon, 28 Apr 2025 01:43:48 +0100 Subject: [PATCH 07/15] chore: bump nix flake --- .gitlab-ci.yml | 9 +- flake.lock | 151 ++++++++++++--------- flake.nix | 19 ++- nix/pkgs/default/cross-compilation-env.nix | 23 +--- nix/pkgs/default/default.nix | 1 + nix/pkgs/oci-image/default.nix | 13 +- 6 files changed, 122 insertions(+), 94 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3346795f..1bef6143 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -84,10 +84,10 @@ artifacts: - ./bin/nix-build-and-cache .#static-x86_64-unknown-linux-musl - cp result/bin/conduit x86_64-unknown-linux-musl - - mkdir -p target/release - - cp result/bin/conduit target/release - - direnv exec . cargo deb --no-build - - mv target/debian/*.deb x86_64-unknown-linux-musl.deb + - mkdir -p target/x86_64-unknown-linux-musl/release + - cp result/bin/conduit target/x86_64-unknown-linux-musl/release + - direnv exec . cargo deb --no-build --target x86_64-unknown-linux-musl + - mv target/x86_64-unknown-linux-musl/debian/*.deb x86_64-unknown-linux-musl.deb # Since the OCI image package is based on the binary package, this has the # fun side effect of uploading the normal binary too. Conduit users who are @@ -105,6 +105,7 @@ artifacts: - mkdir -p target/aarch64-unknown-linux-musl/release - cp result/bin/conduit target/aarch64-unknown-linux-musl/release + # binary stripping requires target-specific binary (`strip`) - direnv exec . cargo deb --no-strip --no-build --target aarch64-unknown-linux-musl - mv target/aarch64-unknown-linux-musl/debian/*.deb aarch64-unknown-linux-musl.deb diff --git a/flake.lock b/flake.lock index 1983d800..03c94c6d 100644 --- a/flake.lock +++ b/flake.lock @@ -4,16 +4,17 @@ "inputs": { "crane": "crane", "flake-compat": "flake-compat", - "flake-utils": "flake-utils", + "flake-parts": "flake-parts", + "nix-github-actions": "nix-github-actions", "nixpkgs": "nixpkgs", "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1707922053, - "narHash": "sha256-wSZjK+rOXn+UQiP1NbdNn5/UW6UcBxjvlqr2wh++MbM=", + "lastModified": 1738524606, + "narHash": "sha256-hPYEJ4juK3ph7kbjbvv7PlU1D9pAkkhl+pwx8fZY53U=", "owner": "zhaofengli", "repo": "attic", - "rev": "6eabc3f02fae3683bffab483e614bebfcd476b21", + "rev": "ff8a897d1f4408ebbf4d45fa9049c06b3e1e3f4e", "type": "github" }, "original": { @@ -31,11 +32,11 @@ ] }, "locked": { - "lastModified": 1702918879, - "narHash": "sha256-tWJqzajIvYcaRWxn+cLUB9L9Pv4dQ3Bfit/YjU5ze3g=", + "lastModified": 1722960479, + "narHash": "sha256-NhCkJJQhD5GUib8zN9JrmYGMwt4lCRp6ZVNzIiYCl0Y=", "owner": "ipetkov", "repo": "crane", - "rev": "7195c00c272fdd92fc74e7d5a0a2844b9fadb2fb", + "rev": "4c6c77920b8d44cd6660c1621dea6b3fc4b4c4f4", "type": "github" }, "original": { @@ -45,23 +46,18 @@ } }, "crane_2": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, "locked": { - "lastModified": 1713721181, - "narHash": "sha256-Vz1KRVTzU3ClBfyhOj8gOehZk21q58T1YsXC30V23PU=", + "lastModified": 1741481578, + "narHash": "sha256-JBTSyJFQdO3V8cgcL08VaBUByEU6P5kXbTJN6R0PFQo=", "owner": "ipetkov", "repo": "crane", - "rev": "55f4939ac59ff8f89c6a4029730a2d49ea09105f", + "rev": "bb1c9567c43e4434f54e9481eb4b8e8e0d50f0b5", "type": "github" }, "original": { "owner": "ipetkov", - "ref": "master", "repo": "crane", + "rev": "bb1c9567c43e4434f54e9481eb4b8e8e0d50f0b5", "type": "github" } }, @@ -73,11 +69,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1709619709, - "narHash": "sha256-l6EPVJfwfelWST7qWQeP6t/TDK3HHv5uUB1b2vw4mOQ=", + "lastModified": 1745735608, + "narHash": "sha256-L0jzm815XBFfF2wCFmR+M1CF+beIEFj6SxlqVKF59Ec=", "owner": "nix-community", "repo": "fenix", - "rev": "c8943ea9e98d41325ff57d4ec14736d330b321b2", + "rev": "c39a78eba6ed2a022cc3218db90d485077101496", "type": "github" }, "original": { @@ -87,22 +83,6 @@ } }, "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1673956053, - "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, - "flake-compat_2": { "flake": false, "locked": { "lastModified": 1696426674, @@ -118,31 +98,53 @@ "type": "github" } }, - "flake-utils": { + "flake-compat_2": { + "flake": false, "locked": { - "lastModified": 1667395993, - "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "lastModified": 1733328505, + "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec", "type": "github" }, "original": { - "owner": "numtide", - "repo": "flake-utils", + "owner": "edolstra", + "repo": "flake-compat", "type": "github" } }, - "flake-utils_2": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": [ + "attic", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1722555600, + "narHash": "sha256-XOQkdLafnb/p9ij77byFQjDf5m5QYl9b2REiVClC+x4=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "8471fe90ad337a8074e957b69ca4d0089218391d", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "flake-utils": { "inputs": { "systems": "systems" }, "locked": { - "lastModified": 1709126324, - "narHash": "sha256-q6EQdSeUZOG26WelxqkmR7kArjgWCdw5sfJVHPH/7j8=", + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", "owner": "numtide", "repo": "flake-utils", - "rev": "d465f4819400de7c8d874d50b982301f28a84605", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", "type": "github" }, "original": { @@ -153,11 +155,11 @@ }, "nix-filter": { "locked": { - "lastModified": 1705332318, - "narHash": "sha256-kcw1yFeJe9N4PjQji9ZeX47jg0p9A0DuU4djKvg1a7I=", + "lastModified": 1731533336, + "narHash": "sha256-oRam5PS1vcrr5UPgALW0eo1m/5/pls27Z/pabHNy2Ms=", "owner": "numtide", "repo": "nix-filter", - "rev": "3449dc925982ad46246cfc36469baf66e1b64f17", + "rev": "f7653272fd234696ae94229839a99b73c9ab7de0", "type": "github" }, "original": { @@ -166,13 +168,34 @@ "type": "github" } }, + "nix-github-actions": { + "inputs": { + "nixpkgs": [ + "attic", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1729742964, + "narHash": "sha256-B4mzTcQ0FZHdpeWcpDYPERtyjJd/NIuaQ9+BV1h+MpA=", + "owner": "nix-community", + "repo": "nix-github-actions", + "rev": "e04df33f62cdcf93d73e9a04142464753a16db67", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nix-github-actions", + "type": "github" + } + }, "nixpkgs": { "locked": { - "lastModified": 1702539185, - "narHash": "sha256-KnIRG5NMdLIpEkZTnN5zovNYc0hhXjAgv6pfd5Z4c7U=", + "lastModified": 1726042813, + "narHash": "sha256-LnNKCCxnwgF+575y0pxUdlGZBO/ru1CtGHIqQVfvjlA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "aa9d4729cbc99dabacb50e3994dcefb3ea0f7447", + "rev": "159be5db480d1df880a0135ca0bfed84c2f88353", "type": "github" }, "original": { @@ -184,27 +207,27 @@ }, "nixpkgs-stable": { "locked": { - "lastModified": 1702780907, - "narHash": "sha256-blbrBBXjjZt6OKTcYX1jpe9SRof2P9ZYWPzq22tzXAA=", + "lastModified": 1724316499, + "narHash": "sha256-Qb9MhKBUTCfWg/wqqaxt89Xfi6qTD3XpTzQ9eXi3JmE=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "1e2e384c5b7c50dbf8e9c441a9e58d85f408b01f", + "rev": "797f7dc49e0bc7fab4b57c021cdf68f595e47841", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-23.11", + "ref": "nixos-24.05", "repo": "nixpkgs", "type": "github" } }, "nixpkgs_2": { "locked": { - "lastModified": 1709479366, - "narHash": "sha256-n6F0n8UV6lnTZbYPl1A9q1BS0p4hduAv1mGAP17CVd0=", + "lastModified": 1745526057, + "narHash": "sha256-ITSpPDwvLBZBnPRS2bUcHY3gZSwis/uTe255QgMtTLA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b8697e57f10292a6165a20f03d2f42920dfaf973", + "rev": "f771eb401a46846c1aebd20552521b233dd7e18b", "type": "github" }, "original": { @@ -220,7 +243,7 @@ "crane": "crane_2", "fenix": "fenix", "flake-compat": "flake-compat_2", - "flake-utils": "flake-utils_2", + "flake-utils": "flake-utils", "nix-filter": "nix-filter", "nixpkgs": "nixpkgs_2" } @@ -228,11 +251,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1709571018, - "narHash": "sha256-ISFrxHxE0J5g7lDAscbK88hwaT5uewvWoma9TlFmRzM=", + "lastModified": 1745694049, + "narHash": "sha256-fxvRYH/tS7hGQeg9zCVh5RBcSWT+JGJet7RA8Ss+rC0=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "9f14343f9ee24f53f17492c5f9b653427e2ad15e", + "rev": "d8887c0758bbd2d5f752d5bd405d4491e90e7ed6", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index df05bf86..6ab922bb 100644 --- a/flake.nix +++ b/flake.nix @@ -12,10 +12,10 @@ url = "github:nix-community/fenix"; inputs.nixpkgs.follows = "nixpkgs"; }; - crane = { - url = "github:ipetkov/crane?ref=master"; - inputs.nixpkgs.follows = "nixpkgs"; - }; + # Pinned because crane's own automatic cross compilation configuration that they + # introduce in the next commit attempts to link the musl targets against glibc + # for some reason. Unpin once this is fixed. + crane.url = "github:ipetkov/crane?rev=bb1c9567c43e4434f54e9481eb4b8e8e0d50f0b5"; attic.url = "github:zhaofengli/attic?ref=main"; }; @@ -24,7 +24,7 @@ # Keep sorted mkScope = pkgs: pkgs.lib.makeScope pkgs.newScope (self: { craneLib = - (inputs.crane.mkLib pkgs).overrideToolchain self.toolchain; + (inputs.crane.mkLib pkgs).overrideToolchain (_: self.toolchain); default = self.callPackage ./nix/pkgs/default {}; @@ -65,7 +65,14 @@ in inputs.flake-utils.lib.eachDefaultSystem (system: let - pkgs = inputs.nixpkgs.legacyPackages.${system}; + pkgs = (import inputs.nixpkgs { + inherit system; + + # libolm is deprecated, but we only need it for complement + config.permittedInsecurePackages = [ + "olm-3.2.16" + ]; + }); in { packages = { diff --git a/nix/pkgs/default/cross-compilation-env.nix b/nix/pkgs/default/cross-compilation-env.nix index fac85e02..da33a8dd 100644 --- a/nix/pkgs/default/cross-compilation-env.nix +++ b/nix/pkgs/default/cross-compilation-env.nix @@ -22,23 +22,10 @@ lib.optionalAttrs stdenv.hostPlatform.isStatic { [ "-C" "relocation-model=static" ] ++ lib.optionals (stdenv.buildPlatform.config != stdenv.hostPlatform.config) - [ "-l" "c" ] - ++ lib.optionals - # This check has to match the one [here][0]. We only need to set - # these flags when using a different linker. Don't ask me why, though, - # because I don't know. All I know is it breaks otherwise. - # - # [0]: https://github.com/NixOS/nixpkgs/blob/5cdb38bb16c6d0a38779db14fcc766bc1b2394d6/pkgs/build-support/rust/lib/default.nix#L37-L40 - ( - # Nixpkgs doesn't check for x86_64 here but we do, because I - # observed a failure building statically for x86_64 without - # including it here. Linkers are weird. - (stdenv.hostPlatform.isAarch64 || stdenv.hostPlatform.isx86_64) - && stdenv.hostPlatform.isStatic - && !stdenv.isDarwin - && !stdenv.cc.bintools.isLLVM - ) [ + "-l" + "c" + "-l" "stdc++" "-L" @@ -80,7 +67,7 @@ lib.optionalAttrs stdenv.hostPlatform.isStatic { { "CC_${cargoEnvVarTarget}" = envVars.ccForHost; "CXX_${cargoEnvVarTarget}" = envVars.cxxForHost; - "CARGO_TARGET_${cargoEnvVarTarget}_LINKER" = envVars.linkerForHost; + "CARGO_TARGET_${cargoEnvVarTarget}_LINKER" = envVars.ccForHost; CARGO_BUILD_TARGET = rustcTarget; } ) @@ -92,7 +79,7 @@ lib.optionalAttrs stdenv.hostPlatform.isStatic { { "CC_${cargoEnvVarTarget}" = envVars.ccForBuild; "CXX_${cargoEnvVarTarget}" = envVars.cxxForBuild; - "CARGO_TARGET_${cargoEnvVarTarget}_LINKER" = envVars.linkerForBuild; + "CARGO_TARGET_${cargoEnvVarTarget}_LINKER" = envVars.ccForBuild; HOST_CC = "${pkgsBuildHost.stdenv.cc}/bin/cc"; HOST_CXX = "${pkgsBuildHost.stdenv.cc}/bin/c++"; } diff --git a/nix/pkgs/default/default.nix b/nix/pkgs/default/default.nix index 92b953a1..c54b6a70 100644 --- a/nix/pkgs/default/default.nix +++ b/nix/pkgs/default/default.nix @@ -18,6 +18,7 @@ let let rocksdb' = rocksdb.override { enableJemalloc = builtins.elem "jemalloc" features; + enableLiburing = false; }; in { diff --git a/nix/pkgs/oci-image/default.nix b/nix/pkgs/oci-image/default.nix index 8b359ce4..f27c45c0 100644 --- a/nix/pkgs/oci-image/default.nix +++ b/nix/pkgs/oci-image/default.nix @@ -2,9 +2,18 @@ { default , dockerTools , lib -, tini +, pkgs }: - +let + # See https://github.com/krallin/tini/pull/223 + tini = pkgs.tini.overrideAttrs { + patches = [ (pkgs.fetchpatch { + url = "https://patch-diff.githubusercontent.com/raw/krallin/tini/pull/223.patch"; + hash = "sha256-i6xcf+qpjD+7ZQY3ueiDaxO4+UA2LutLCZLNmT+ji1s="; + }) + ]; + }; +in dockerTools.buildImage { name = default.pname; tag = "next"; From 33b02c868d182101c99891ece39d43b4244f5b1b Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 27 Apr 2025 17:08:50 +0100 Subject: [PATCH 08/15] chore(rust): upgrade to 1.83.0 This is needed for std::io::ErrorKind::DirectoryNotEmpty --- Cargo.toml | 2 +- flake.nix | 2 +- rust-toolchain.toml | 2 +- src/database/key_value/sending.rs | 4 ++-- src/utils/mod.rs | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f120b014..d352ce7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ repository = "https://gitlab.com/famedly/conduit" version = "0.10.0-alpha" # See also `rust-toolchain.toml` -rust-version = "1.81.0" +rust-version = "1.83.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/flake.nix b/flake.nix index 6ab922bb..0131b89e 100644 --- a/flake.nix +++ b/flake.nix @@ -59,7 +59,7 @@ file = ./rust-toolchain.toml; # See also `rust-toolchain.toml` - sha256 = "sha256-VZZnlyP69+Y3crrLHQyJirqlHrTtGTsyiSnZB8jEvVo="; + sha256 = "sha256-s1RPtyvDGJaX/BisLT+ifVfuhDT1nZkZ1NcK8sbwELM="; }; }); in diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 465ffdee..c5259d2a 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -9,7 +9,7 @@ # If you're having trouble making the relevant changes, bug a maintainer. [toolchain] -channel = "1.81.0" +channel = "1.83.0" components = [ # For rust-analyzer "rust-src", diff --git a/src/database/key_value/sending.rs b/src/database/key_value/sending.rs index 3fc3e042..b7064610 100644 --- a/src/database/key_value/sending.rs +++ b/src/database/key_value/sending.rs @@ -89,11 +89,11 @@ impl service::sending::Data for KeyValueDatabase { outgoing_kind: &OutgoingKind, ) -> Box)>> + 'a> { let prefix = outgoing_kind.get_prefix(); - return Box::new( + Box::new( self.servernameevent_data .scan_prefix(prefix) .map(|(k, v)| parse_servercurrentevent(&k, v).map(|(_, ev)| (ev, k))), - ); + ) } fn mark_as_active(&self, events: &[(SendingEventType, Vec)]) -> Result<()> { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 69ec809f..8cab3762 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -134,7 +134,7 @@ pub fn deserialize_from_str< deserializer: D, ) -> Result { struct Visitor, E>(std::marker::PhantomData); - impl<'de, T: FromStr, Err: fmt::Display> serde::de::Visitor<'de> for Visitor { + impl, Err: fmt::Display> serde::de::Visitor<'_> for Visitor { type Value = T; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { write!(formatter, "a parsable string") @@ -156,7 +156,7 @@ pub fn deserialize_from_str< /// string when passed to a format string. pub struct HtmlEscape<'a>(pub &'a str); -impl<'a> fmt::Display for HtmlEscape<'a> { +impl fmt::Display for HtmlEscape<'_> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { // Because the internet is always right, turns out there's not that many // characters to escape: http://stackoverflow.com/questions/7381974 From d76637048a4bd8c5c85b124163edafabb6e21274 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Sun, 30 Mar 2025 00:54:09 +0000 Subject: [PATCH 09/15] feat(admin): commands for purging media --- Cargo.lock | 7 + Cargo.toml | 2 + src/database/key_value/media.rs | 441 +++++++++++++++++++++++++++++++- src/service/admin/mod.rs | 414 ++++++++++++++++++++++++------ src/service/media/data.rs | 22 +- src/service/media/mod.rs | 133 +++++++++- 6 files changed, 937 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf5e656d..6764c0c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -503,6 +503,7 @@ dependencies = [ "hickory-resolver", "hmac", "http 1.1.0", + "humantime", "hyper 1.3.1", "hyper-util", "image", @@ -1195,6 +1196,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" + [[package]] name = "hyper" version = "0.14.29" diff --git a/Cargo.toml b/Cargo.toml index d352ce7a..7ee97b4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,6 +131,8 @@ clap = { version = "4.3.0", default-features = false, features = [ "string", "usage", ] } +humantime = "2" + futures-util = { version = "0.3.28", default-features = false } # Used for reading the configuration from conduit.toml & environment variables figment = { version = "0.10.8", features = ["env", "toml"] } diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 8ab9046b..6f835b5b 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,4 +1,6 @@ -use ruma::{api::client::error::ErrorKind, ServerName, UserId}; +use std::{collections::BTreeMap, ops::Range}; + +use ruma::{api::client::error::ErrorKind, OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; use tracing::error; @@ -153,6 +155,403 @@ impl service::media::Data for KeyValueDatabase { .map(|_| metadata) .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found.")) } + + fn purge_and_get_hashes( + &self, + media: &[(OwnedServerName, String)], + force_filehash: bool, + ) -> Vec> { + let mut files = Vec::new(); + + let purge = |mut value: Vec| { + value.truncate(32); + let sha256_digest = value; + + let sha256_hex = hex::encode(&sha256_digest); + + self.purge_filehash(sha256_digest, false)?; + + Ok(sha256_hex) + }; + + for (server_name, media_id) in media { + if force_filehash { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + match self.servernamemediaid_metadata.get(&key) { + Ok(Some(value)) => { + files.push(purge(value)); + } + Ok(None) => (), + Err(e) => { + files.push(Err(e)); + } + } + + key.push(0xff); + for (_, value) in self.thumbnailid_metadata.scan_prefix(key) { + files.push(purge(value)); + } + } else { + match self.purge_mediaid(server_name, media_id, false) { + Ok(f) => { + files.append(&mut f.into_iter().map(Ok).collect()); + } + Err(e) => files.push(Err(e)), + } + } + } + + files + } + + fn purge_and_get_hashes_from_user( + &self, + user_id: &UserId, + force_filehash: bool, + after: Option, + ) -> Vec> { + let mut files = Vec::new(); + let mut prefix = user_id.server_name().as_bytes().to_vec(); + prefix.push(0xff); + prefix.extend_from_slice(user_id.localpart().as_bytes()); + prefix.push(0xff); + + let purge_filehash = |sha256_digest: Vec| { + let sha256_hex = hex::encode(&sha256_digest); + + self.purge_filehash(sha256_digest, false)?; + + Ok(sha256_hex) + }; + + for (k, _) in self.servername_userlocalpart_mediaid.scan_prefix(prefix) { + let metadata = || { + let mut parts = k.rsplit(|&b| b == 0xff); + let media_id_bytes = parts.next().ok_or_else(|| { + Error::bad_database( + "Invalid format for key of servername_userlocalpart_mediaid", + ) + })?; + + let media_id = utils::string_from_bytes(media_id_bytes).map_err(|_| { + Error::bad_database( + "Invalid media_id string in servername_userlocalpart_mediaid", + ) + })?; + + let mut key = user_id.server_name().as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + Ok(( + self.servernamemediaid_metadata.get(&key)?.ok_or_else(|| { + error!( + "Missing metadata for \"mxc://{}/{media_id}\", despite storing it's uploader", + user_id.server_name() + ); + Error::BadDatabase("Missing metadata for media id and server_name") + })?, + media_id, + )) + }; + + let (mut metadata, media_id) = match metadata() { + Ok(v) => v, + Err(e) => { + files.push(Err(e)); + continue; + } + }; + + metadata.truncate(32); + let sha256_digest = metadata; + + if let Some(after) = after { + let metadata = match self + .filehash_metadata + .get(&sha256_digest) + .map(|opt| opt.map(FilehashMetadata::from_vec)) + { + Ok(Some(metadata)) => metadata, + // If the media has already been deleted, we shouldn't treat that as an error + Ok(None) => continue, + Err(e) => { + files.push(Err(e)); + continue; + } + }; + + let creation = match metadata.creation(&sha256_digest) { + Ok(c) => c, + Err(e) => { + files.push(Err(e)); + continue; + } + }; + + if creation < after { + continue; + } + } + + if force_filehash { + files.push(purge_filehash(sha256_digest)); + + let mut prefix = user_id.server_name().as_bytes().to_vec(); + prefix.push(0xff); + prefix.extend_from_slice(media_id.as_bytes()); + prefix.push(0xff); + for (_, mut metadata) in self.thumbnailid_metadata.scan_prefix(prefix) { + metadata.truncate(32); + let sha256_digest = metadata; + files.push(purge_filehash(sha256_digest)); + } + } else { + match self.purge_mediaid(user_id.server_name(), &media_id, false) { + Ok(f) => { + files.append(&mut f.into_iter().map(Ok).collect()); + } + Err(e) => files.push(Err(e)), + } + } + } + + files + } + + fn purge_and_get_hashes_from_server( + &self, + server_name: &ServerName, + force_filehash: bool, + after: Option, + ) -> Vec> { + let mut prefix = server_name.as_bytes().to_vec(); + prefix.push(0xff); + + let mut files = Vec::new(); + + // Purges all references to the given media in the database, + // returning a Vec of hex sha256 digests + let purge_sha256 = |files: &mut Vec>, mut metadata: Vec| { + metadata.truncate(32); + let sha256_digest = metadata; + + if let Some(after) = after { + let Some(metadata) = self + .filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + else { + // If the media has already been deleted, we shouldn't treat that as an error + return Ok(()); + }; + + if metadata.creation(&sha256_digest)? < after { + return Ok(()); + } + } + + let sha256_hex = hex::encode(&sha256_digest); + + self.purge_filehash(sha256_digest, false)?; + + files.push(Ok(sha256_hex)); + Ok(()) + }; + + let purge_mediaid = |files: &mut Vec>, key: Vec| { + let mut parts = key.split(|&b| b == 0xff); + + let server_name = parts + .next() + .ok_or_else(|| Error::bad_database("Invalid format of metadata key")) + .map(utils::string_from_bytes)? + .map_err(|_| Error::bad_database("Invalid ServerName String in metadata key")) + .map(OwnedServerName::try_from)? + .map_err(|_| Error::bad_database("Invalid ServerName String in metadata key"))?; + + let media_id = parts + .next() + .ok_or_else(|| Error::bad_database("Invalid format of metadata key")) + .map(utils::string_from_bytes)? + .map_err(|_| Error::bad_database("Invalid Media ID String in metadata key"))?; + + files.append( + &mut self + .purge_mediaid(&server_name, &media_id, false)? + .into_iter() + .map(Ok) + .collect(), + ); + + Ok(()) + }; + + for (key, value) in self + .servernamemediaid_metadata + .scan_prefix(prefix.clone()) + .chain(self.thumbnailid_metadata.scan_prefix(prefix.clone())) + { + if let Err(e) = if force_filehash { + purge_sha256(&mut files, value) + } else { + purge_mediaid(&mut files, key) + } { + files.push(Err(e)); + } + } + + files + } +} + +impl KeyValueDatabase { + fn purge_mediaid( + &self, + server_name: &ServerName, + media_id: &str, + only_filehash_metadata: bool, + ) -> Result> { + let mut files = Vec::new(); + + let count_required_to_purge = if only_filehash_metadata { 1 } else { 0 }; + + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + if let Some(sha256_digest) = self.servernamemediaid_metadata.get(&key)?.map(|mut value| { + value.truncate(32); + value + }) { + if !only_filehash_metadata { + if let Some(localpart) = self.servernamemediaid_userlocalpart.get(&key)? { + self.servernamemediaid_userlocalpart.remove(&key)?; + + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(&localpart); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.servername_userlocalpart_mediaid.remove(&key)?; + }; + + self.servernamemediaid_metadata.remove(&key)?; + + let mut key = sha256_digest.clone(); + key.extend_from_slice(server_name.as_bytes()); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.filehash_servername_mediaid.remove(&key)?; + } + + if self + .filehash_servername_mediaid + .scan_prefix(sha256_digest.clone()) + .count() + <= count_required_to_purge + && self + .filehash_thumbnailid + .scan_prefix(sha256_digest.clone()) + .next() + .is_none() + { + self.filehash_metadata.remove(&sha256_digest)?; + files.push(hex::encode(sha256_digest)); + } + } + + key.push(0xff); + + let mut thumbnails = BTreeMap::new(); + + for (thumbnail_id, mut value) in self.thumbnailid_metadata.scan_prefix(key) { + value.truncate(32); + let sha256_digest = value; + + let entry = thumbnails + .entry(sha256_digest.clone()) + .and_modify(|v| *v += 1) + .or_insert(1); + + if !only_filehash_metadata { + self.filehash_thumbnailid.remove(&sha256_digest)?; + self.thumbnailid_metadata.remove(&thumbnail_id)?; + } + + // Basically, if this is the only media pointing to the filehash, get rid of it. + // It's a little complicated due to how blocking works. + if self + .filehash_servername_mediaid + .scan_prefix(sha256_digest.clone()) + .count() + <= count_required_to_purge + && self + .filehash_thumbnailid + .scan_prefix(sha256_digest.clone()) + .count() + <= if only_filehash_metadata { *entry } else { 0 } + { + self.filehash_metadata.remove(&sha256_digest)?; + files.push(hex::encode(sha256_digest)); + } + } + + Ok(files) + } + + fn purge_filehash(&self, sha256_digest: Vec, only_filehash_metadata: bool) -> Result<()> { + let handle_error = || { + error!( + "Invalid format of key in filehash_servername_mediaid for media with sha256 content hash of {}", + hex::encode(&sha256_digest) + ); + Error::BadDatabase("Invalid format of key in filehash_servername_mediaid") + }; + + if !only_filehash_metadata { + for (key, _) in self.filehash_thumbnailid.scan_prefix(sha256_digest.clone()) { + self.filehash_thumbnailid.remove(&key)?; + let (_, key) = key.split_at(32); + self.thumbnailid_metadata.remove(key)?; + } + + for (k, _) in self + .filehash_servername_mediaid + .scan_prefix(sha256_digest.clone()) + { + let (_, servername_mediaid) = k.split_at_checked(32).ok_or_else(handle_error)?; + + self.servernamemediaid_metadata.remove(servername_mediaid)?; + self.filehash_servername_mediaid.remove(&k)?; + + if let Some(localpart) = self + .servernamemediaid_userlocalpart + .get(servername_mediaid)? + { + self.servernamemediaid_userlocalpart + .remove(servername_mediaid)?; + + let mut parts = servername_mediaid.split(|b: &u8| *b == 0xff); + + let mut key = parts.next().ok_or_else(handle_error)?.to_vec(); + key.push(0xff); + key.extend_from_slice(&localpart); + key.push(0xff); + key.extend_from_slice(parts.next().ok_or_else(handle_error)?); + + self.servername_userlocalpart_mediaid.remove(&key)?; + }; + } + } + + self.filehash_metadata.remove(&sha256_digest) + } } fn parse_metadata(value: &[u8]) -> Result { @@ -213,7 +612,47 @@ impl FilehashMetadata { Self { value } } + pub fn from_vec(vec: Vec) -> Self { + Self { value: vec } + } + pub fn value(&self) -> &[u8] { &self.value } + + fn get_u64_val( + &self, + range: Range, + name: &str, + sha256_digest: &[u8], + invalid_error: &'static str, + ) -> Result { + self.value + .get(range) + .ok_or_else(|| { + error!( + "Invalid format of metadata for media with sha256 content hash of {}", + hex::encode(sha256_digest) + ); + Error::BadDatabase("Invalid format of metadata in filehash_metadata") + })? + .try_into() + .map(u64::from_be_bytes) + .map_err(|_| { + error!( + "Invalid {name} for media with sha256 content hash of {}", + hex::encode(sha256_digest) + ); + Error::BadDatabase(invalid_error) + }) + } + + pub fn creation(&self, sha256_digest: &[u8]) -> Result { + self.get_u64_val( + 8..16, + "creation time", + sha256_digest, + "Invalid creation time in filehash_metadata", + ) + } } diff --git a/src/service/admin/mod.rs b/src/service/admin/mod.rs index 690da984..0e3cef1e 100644 --- a/src/service/admin/mod.rs +++ b/src/service/admin/mod.rs @@ -1,6 +1,11 @@ -use std::{collections::BTreeMap, convert::TryFrom, sync::Arc, time::Instant}; +use std::{ + collections::BTreeMap, + convert::TryFrom, + sync::Arc, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, +}; -use clap::Parser; +use clap::{Args, Parser}; use regex::Regex; use ruma::{ api::appservice::Registration, @@ -19,8 +24,8 @@ use ruma::{ }, TimelineEventType, }, - EventId, MilliSecondsSinceUnixEpoch, OwnedRoomAliasId, OwnedRoomId, RoomAliasId, RoomId, - RoomVersionId, ServerName, UserId, + EventId, MilliSecondsSinceUnixEpoch, MxcUri, OwnedRoomAliasId, OwnedRoomId, OwnedServerName, + RoomAliasId, RoomId, RoomVersionId, ServerName, UserId, }; use serde_json::value::to_raw_value; use tokio::sync::{mpsc, Mutex, RwLock}; @@ -82,11 +87,13 @@ enum AdminCommand { /// Deactivate a user /// /// User will not be removed from all rooms by default. - /// Use --leave-rooms to force the user to leave all rooms + /// Use --leave-rooms to force the user to leave all rooms. DeactivateUser { #[arg(short, long)] leave_rooms: bool, user_id: Box, + #[command(flatten)] + purge_media: DeactivatePurgeMediaArgs, }, #[command(verbatim_doc_comment)] @@ -94,6 +101,8 @@ enum AdminCommand { /// /// Recommended to use in conjunction with list-local-users. /// + /// Use either --purge-all-media or --purge-media-from-last to either delete all media uploaded + /// by them (in the last {specified timeframe}, if any) /// Users will not be removed from joined rooms by default. /// Can be overridden with --leave-rooms flag. /// Removing a mass amount of users from a room may cause a significant amount of leave events. @@ -107,9 +116,68 @@ enum AdminCommand { #[arg(short, long)] /// Remove users from their joined rooms leave_rooms: bool, - #[arg(short, long)] + #[arg(short = 'F', long)] /// Also deactivate admin accounts force: bool, + #[command(flatten)] + purge_media: DeactivatePurgeMediaArgs, + }, + + /// Purge a list of media, formatted as MXC URIs + /// There should be one URI per line, all contained within a code-block + /// + /// Note: This will also delete media with the same sha256 hash, so + /// only use this when you are certain all the media is undesirable + PurgeMedia, + + /// Purges all media uploaded by the local users listed in a code-block. + /// + /// Note: This will also delete identical media uploaded by other users, so + /// only use this when all the media they uploaded is undesirable + PurgeMediaFromUsers { + #[arg( + long, short = 't', + value_parser = humantime::parse_duration + )] + /// Only purge media uploaded in the last {timeframe} + /// + /// Should be in the form specified by humantime::parse_duration + /// (e.g. 48h, 60min, 10days etc.) + // --help is unformatted + #[allow(rustdoc::bare_urls)] + /// https://docs.rs/humantime/2.2.0/humantime/fn.parse_duration.html + from_last: Option, + + #[arg(long, short)] + /// Also deletes other media with the same SHA256 hash, ensuring that the file is removed from + /// the media backend, so only use this when all the media they uploaded is undesirable + force_filehash: bool, + }, + + /// Purges all media from the specified server + /// + /// Note: This will also delete identical media uploaded by local users, so + /// only use this when all the media from that server is undesirable (or if + /// you know that no media on the remote server is also uploaded locally) + PurgeMediaFromServer { + server_id: Box, + #[arg( + long, short = 't', + value_parser = humantime::parse_duration + )] + /// Only purge media uploaded in the last {timeframe} + /// + /// Should be in the form specified by humantime::parse_duration + /// (e.g. 48h, 60min, 10days etc.) + // --help is unformatted + #[allow(rustdoc::bare_urls)] + /// https://docs.rs/humantime/2.2.0/humantime/fn.parse_duration.html + from_last: Option, + + #[arg(long, short)] + /// Also deletes other media with the same SHA256 hash, ensuring that the file is removed from + /// the media backend, so only use this when all the media they uploaded is undesirable + force_filehash: bool, }, /// Get the auth_chain of a PDU @@ -181,6 +249,37 @@ enum AdminCommand { HashAndSignEvent { room_version_id: RoomVersionId }, } +#[derive(Args, Debug)] +#[group(multiple = true, required = false)] +pub struct DeactivatePurgeMediaArgs { + #[arg(long, short = 'm')] + /// Purges all media uploaded by the user(s) after deactivating their account + purge_media: bool, + + #[arg( + long, short = 't', + value_parser = humantime::parse_duration, + requires = "purge_media" + )] + /// If the --purge-media is present, it only purges media uploaded in the last {time-period} + /// + /// Should be in the form specified by humantime::parse_duration + /// (e.g. 48h, 60min, 10days etc.) + // --help is unformatted + #[allow(rustdoc::bare_urls)] + /// https://docs.rs/humantime/2.2.0/humantime/fn.parse_duration.html + /// + /// Note: This will also delete identical media uploaded by other users, so + /// only use this when all the media they uploaded in this timeframe is undesirable + media_from_last: Option, + + #[arg(long, short = 'f', requires = "purge_media")] + /// If the --purge-media is present, it will also delete identical media uploaded by other + /// users, ensuring that the file is removed from the media backend, so only use this when all + /// the media they uploaded is undesirable + force_filehash: bool, +} + #[derive(Debug)] pub enum AdminRoomEvent { ProcessMessage(String), @@ -690,6 +789,7 @@ impl Service { AdminCommand::DeactivateUser { leave_rooms, user_id, + purge_media, } => { let user_id = Arc::::from(user_id); if !services().users.exists(&user_id)? { @@ -711,78 +811,42 @@ impl Service { leave_all_rooms(&user_id).await?; } - RoomMessageEventContent::text_plain(format!( - "User {user_id} has been deactivated" + let failed_purged_media = if purge_media.purge_media { + let after = purge_media + .media_from_last + .map(unix_secs_from_duration) + .transpose()?; + + services() + .media + .purge_from_user(&user_id, purge_media.force_filehash, after) + .len() + } else { + 0 + }; + + if failed_purged_media == 0 { + RoomMessageEventContent::text_plain(format!( + "User {user_id} has been deactivated" + )) + } else { + RoomMessageEventContent ::text_plain(format!( + "User {user_id} has been deactivated, but {failed_purged_media} media failed to be purged, check the logs for more details" )) + } } } - AdminCommand::DeactivateAll { leave_rooms, force } => { + AdminCommand::DeactivateAll { + leave_rooms, + force, + purge_media, + } => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" { - let users = body.clone().drain(1..body.len() - 1).collect::>(); - - let mut user_ids = Vec::new(); - let mut remote_ids = Vec::new(); - let mut non_existent_ids = Vec::new(); - let mut invalid_users = Vec::new(); - - for &user in &users { - match <&UserId>::try_from(user) { - Ok(user_id) => { - if user_id.server_name() != services().globals.server_name() { - remote_ids.push(user_id) - } else if !services().users.exists(user_id)? { - non_existent_ids.push(user_id) - } else { - user_ids.push(user_id) - } - } - Err(_) => { - invalid_users.push(user); - } - } - } - - let mut markdown_message = String::new(); - let mut html_message = String::new(); - if !invalid_users.is_empty() { - markdown_message.push_str("The following user ids are not valid:\n```\n"); - html_message.push_str("The following user ids are not valid:\n
\n");
-                        for invalid_user in invalid_users {
-                            markdown_message.push_str(&format!("{invalid_user}\n"));
-                            html_message.push_str(&format!("{invalid_user}\n"));
-                        }
-                        markdown_message.push_str("```\n\n");
-                        html_message.push_str("
\n\n"); - } - if !remote_ids.is_empty() { - markdown_message - .push_str("The following users are not from this server:\n```\n"); - html_message - .push_str("The following users are not from this server:\n
\n");
-                        for remote_id in remote_ids {
-                            markdown_message.push_str(&format!("{remote_id}\n"));
-                            html_message.push_str(&format!("{remote_id}\n"));
-                        }
-                        markdown_message.push_str("```\n\n");
-                        html_message.push_str("
\n\n"); - } - if !non_existent_ids.is_empty() { - markdown_message.push_str("The following users do not exist:\n```\n"); - html_message.push_str("The following users do not exist:\n
\n");
-                        for non_existent_id in non_existent_ids {
-                            markdown_message.push_str(&format!("{non_existent_id}\n"));
-                            html_message.push_str(&format!("{non_existent_id}\n"));
-                        }
-                        markdown_message.push_str("```\n\n");
-                        html_message.push_str("
\n\n"); - } - if !markdown_message.is_empty() { - return Ok(RoomMessageEventContent::text_html( - markdown_message, - html_message, - )); - } + let mut user_ids = match userids_from_body(&body)? { + Ok(v) => v, + Err(message) => return Ok(message), + }; let mut deactivation_count = 0; let mut admins = Vec::new(); @@ -812,12 +876,81 @@ impl Service { } } - if admins.is_empty() { - RoomMessageEventContent::text_plain(format!( - "Deactivated {deactivation_count} accounts." + let mut failed_count = 0; + + if purge_media.purge_media { + let after = purge_media + .media_from_last + .map(unix_secs_from_duration) + .transpose()?; + + for user_id in user_ids { + failed_count += services() + .media + .purge_from_user(user_id, purge_media.force_filehash, after) + .len(); + } + } + + let mut message = format!("Deactivated {deactivation_count} accounts."); + if !admins.is_empty() { + message.push_str(&format!("\nSkipped admin accounts: {:?}. Use --force to deactivate admin accounts",admins.join(", "))); + } + if failed_count != 0 { + message.push_str(&format!( + "\nFailed to delete {failed_count} media, check logs for more details" )) + } + + RoomMessageEventContent::text_plain(message) + } else { + RoomMessageEventContent::text_plain( + "Expected code block in command body. Add --help for details.", + ) + } + } + AdminCommand::PurgeMedia => media_from_body(body).map_or_else( + |message| message, + |media| { + let failed_count = services().media.purge(&media, true).len(); + + if failed_count == 0 { + RoomMessageEventContent::text_plain("Successfully purged media") } else { - RoomMessageEventContent::text_plain(format!("Deactivated {} accounts.\nSkipped admin accounts: {:?}. Use --force to deactivate admin accounts", deactivation_count, admins.join(", "))) + RoomMessageEventContent::text_plain(format!( + "Failed to delete {failed_count} media, check logs for more details" + )) + } + }, + ), + AdminCommand::PurgeMediaFromUsers { + from_last, + force_filehash, + } => { + let after = from_last.map(unix_secs_from_duration).transpose()?; + + if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" + { + let user_ids = match userids_from_body(&body)? { + Ok(v) => v, + Err(message) => return Ok(message), + }; + + let mut failed_count = 0; + + for user_id in user_ids { + failed_count += services() + .media + .purge_from_user(user_id, force_filehash, after) + .len(); + } + + if failed_count == 0 { + RoomMessageEventContent::text_plain("Successfully purged media") + } else { + RoomMessageEventContent::text_plain(format!( + "Failed to purge {failed_count} media, check logs for more details" + )) } } else { RoomMessageEventContent::text_plain( @@ -825,6 +958,34 @@ impl Service { ) } } + AdminCommand::PurgeMediaFromServer { + server_id: server_name, + from_last, + force_filehash, + } => { + if server_name == services().globals.server_name() { + return Err(Error::AdminCommand( + "Cannot purge all media from your own homeserver", + )); + } + + let after = from_last.map(unix_secs_from_duration).transpose()?; + + let failed_count = services() + .media + .purge_from_server(&server_name, force_filehash, after) + .len(); + + if failed_count == 0 { + RoomMessageEventContent::text_plain(format!( + "Media from {server_name} has successfully been purged" + )) + } else { + RoomMessageEventContent::text_plain(format!( + "Failed to purge {failed_count} media, check logs for more details" + )) + } + } AdminCommand::SignJson => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" { @@ -1456,6 +1617,105 @@ impl Service { } } +fn userids_from_body<'a>( + body: &'a [&'a str], +) -> Result, RoomMessageEventContent>, Error> { + let users = body.to_owned().drain(1..body.len() - 1).collect::>(); + + let mut user_ids = Vec::new(); + let mut remote_ids = Vec::new(); + let mut non_existent_ids = Vec::new(); + let mut invalid_users = Vec::new(); + + for &user in &users { + match <&UserId>::try_from(user) { + Ok(user_id) => { + if user_id.server_name() != services().globals.server_name() { + remote_ids.push(user_id) + } else if !services().users.exists(user_id)? { + non_existent_ids.push(user_id) + } else { + user_ids.push(user_id) + } + } + Err(_) => { + invalid_users.push(user); + } + } + } + + let mut markdown_message = String::new(); + let mut html_message = String::new(); + if !invalid_users.is_empty() { + markdown_message.push_str("The following user ids are not valid:\n```\n"); + html_message.push_str("The following user ids are not valid:\n
\n");
+        for invalid_user in invalid_users {
+            markdown_message.push_str(&format!("{invalid_user}\n"));
+            html_message.push_str(&format!("{invalid_user}\n"));
+        }
+        markdown_message.push_str("```\n\n");
+        html_message.push_str("
\n\n"); + } + if !remote_ids.is_empty() { + markdown_message.push_str("The following users are not from this server:\n```\n"); + html_message.push_str("The following users are not from this server:\n
\n");
+        for remote_id in remote_ids {
+            markdown_message.push_str(&format!("{remote_id}\n"));
+            html_message.push_str(&format!("{remote_id}\n"));
+        }
+        markdown_message.push_str("```\n\n");
+        html_message.push_str("
\n\n"); + } + if !non_existent_ids.is_empty() { + markdown_message.push_str("The following users do not exist:\n```\n"); + html_message.push_str("The following users do not exist:\n
\n");
+        for non_existent_id in non_existent_ids {
+            markdown_message.push_str(&format!("{non_existent_id}\n"));
+            html_message.push_str(&format!("{non_existent_id}\n"));
+        }
+        markdown_message.push_str("```\n\n");
+        html_message.push_str("
\n\n"); + } + if !markdown_message.is_empty() { + return Ok(Err(RoomMessageEventContent::text_html( + markdown_message, + html_message, + ))); + } + + Ok(Ok(user_ids)) +} + +fn media_from_body( + body: Vec<&str>, +) -> Result, RoomMessageEventContent> { + if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" { + Ok(body + .clone() + .drain(1..body.len() - 1) + .map(>::from) + .filter_map(|mxc| { + mxc.parts() + .map(|(server_name, media_id)| (server_name.to_owned(), media_id.to_owned())) + .ok() + }) + .collect::>()) + } else { + Err(RoomMessageEventContent::text_plain( + "Expected code block in command body. Add --help for details.", + )) + } +} + +fn unix_secs_from_duration(duration: Duration) -> Result { + SystemTime::now() + .checked_sub(duration).ok_or_else(||Error::AdminCommand("Given timeframe cannot be represented as system time, please try again with a shorter time-frame")) + .map(|time| time + .duration_since(UNIX_EPOCH) + .expect("Time is after unix epoch") + .as_secs()) +} + #[cfg(test)] mod test { use super::*; diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 2301ebbc..97074d30 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -1,4 +1,4 @@ -use ruma::{ServerName, UserId}; +use ruma::{OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; use crate::Result; @@ -42,4 +42,24 @@ pub trait Data: Send + Sync { width: u32, height: u32, ) -> Result; + + fn purge_and_get_hashes( + &self, + media: &[(OwnedServerName, String)], + force_filehash: bool, + ) -> Vec>; + + fn purge_and_get_hashes_from_user( + &self, + user_id: &UserId, + force_filehash: bool, + after: Option, + ) -> Vec>; + + fn purge_and_get_hashes_from_server( + &self, + server_name: &ServerName, + force_filehash: bool, + after: Option, + ) -> Vec>; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index b325f507..8cf1d6b5 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,15 +1,19 @@ mod data; -use std::io::Cursor; +use std::{fs, io::Cursor}; pub use data::Data; use ruma::{ api::client::{error::ErrorKind, media::is_safe_inline_content_type}, http_headers::{ContentDisposition, ContentDispositionType}, - ServerName, UserId, + OwnedServerName, ServerName, UserId, }; use sha2::{digest::Output, Digest, Sha256}; +use tracing::error; -use crate::{config::MediaConfig, services, Error, Result}; +use crate::{ + config::{DirectoryStructure, MediaConfig}, + services, Error, Result, +}; use image::imageops::FilterType; pub struct DbFileMeta { @@ -293,6 +297,67 @@ impl Service { })) } } + + /// Purges all of the specified media. + /// + /// If `force_filehash` is true, all media and/or thumbnails which share sha256 content hashes + /// with the purged media will also be purged, meaning that the media is guaranteed to be deleted + /// from the media backend. Otherwise, it will be deleted if only the media IDs requested to be + /// purged have that sha256 hash. + /// + /// Returns errors for all the files that were failed to be deleted, if any. + pub fn purge(&self, media: &[(OwnedServerName, String)], force_filehash: bool) -> Vec { + let hashes = self.db.purge_and_get_hashes(media, force_filehash); + + purge_files(hashes) + } + + /// Purges all (past a certain time in unix seconds, if specified) media + /// sent by a user. + /// + /// If `force_filehash` is true, all media and/or thumbnails which share sha256 content hashes + /// with the purged media will also be purged, meaning that the media is guaranteed to be deleted + /// from the media backend. Otherwise, it will be deleted if only the media IDs requested to be + /// purged have that sha256 hash. + /// + /// Returns errors for all the files that were failed to be deleted, if any. + /// + /// Note: it only currently works for local users, as we cannot determine who + /// exactly uploaded the file when it comes to remove users. + pub fn purge_from_user( + &self, + user_id: &UserId, + force_filehash: bool, + after: Option, + ) -> Vec { + let hashes = self + .db + .purge_and_get_hashes_from_user(user_id, force_filehash, after); + + purge_files(hashes) + } + + /// Purges all (past a certain time in unix seconds, if specified) media + /// obtained from the specified server (due to the MXC URI). + /// + /// If `force_filehash` is true, all media and/or thumbnails which share sha256 content hashes + /// with the purged media will also be purged, meaning that the media is guaranteed to be deleted + /// from the media backend. Otherwise, it will be deleted if only the media IDs requested to be + /// purged have that sha256 hash. + /// + /// Returns errors for all the files that were failed to be deleted, if any. + pub fn purge_from_server( + &self, + server_name: &ServerName, + force_filehash: bool, + after: Option, + ) -> Vec { + let hashes = self + .db + .purge_and_get_hashes_from_server(server_name, force_filehash, after); + + purge_files(hashes) + } } /// Creates the media file, using the configured media backend @@ -335,6 +400,68 @@ async fn get_file(sha256_hex: &str) -> Result> { }) } +/// Purges the given files from the media backend +/// Returns a `Vec` of errors that occurred when attempting to delete the files +/// +/// Note: this does NOT remove the related metadata from the database +fn purge_files(hashes: Vec>) -> Vec { + hashes + .into_iter() + .map(|hash| match hash { + Ok(v) => delete_file(&v), + Err(e) => Err(e), + }) + .filter_map(|r| if let Err(e) = r { Some(e) } else { None }) + .collect() +} + +/// Deletes the given file from the media backend +/// +/// Note: this does NOT remove the related metadata from the database +fn delete_file(sha256_hex: &str) -> Result<()> { + match &services().globals.config.media { + MediaConfig::FileSystem { + path, + directory_structure, + } => { + let mut path = + services() + .globals + .get_media_path(path, directory_structure, sha256_hex)?; + + if let Err(e) = fs::remove_file(&path) { + // Multiple files with the same filehash might be requseted to be deleted + if e.kind() != std::io::ErrorKind::NotFound { + error!("Error removing media from filesystem: {e}"); + Err(e)?; + } + } + + if let DirectoryStructure::Deep { length: _, depth } = directory_structure { + let mut depth = depth.get(); + + while depth > 0 { + // Here at the start so that the first time, the file gets removed from the path + path.pop(); + + if let Err(e) = fs::remove_dir(&path) { + if e.kind() == std::io::ErrorKind::DirectoryNotEmpty { + break; + } else { + error!("Error removing empty media directories: {e}"); + Err(e)?; + } + } + + depth -= 1; + } + } + } + } + + Ok(()) +} + /// Creates a content disposition with the given `filename`, using the `content_type` to determine whether /// the disposition should be `inline` or `attachment` fn content_disposition( From 594fe5f98f2941221a1ef6f447c36b89c206c4a8 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Mon, 31 Mar 2025 00:39:19 +0100 Subject: [PATCH 10/15] feat(media): blocking --- Cargo.lock | 69 ++++++ Cargo.toml | 1 + src/api/client_server/media.rs | 6 + src/api/server_server.rs | 8 + src/database/key_value/media.rs | 408 ++++++++++++++++++++++++++++++-- src/database/mod.rs | 2 + src/service/admin/mod.rs | 168 ++++++++++++- src/service/media/data.rs | 33 ++- src/service/media/mod.rs | 62 ++++- 9 files changed, 738 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6764c0c5..c5d5695b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,6 +38,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstyle" version = "1.0.7" @@ -430,6 +445,20 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "chrono" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -495,6 +524,7 @@ dependencies = [ "axum-server", "base64 0.22.1", "bytes", + "chrono", "clap", "directories", "figment", @@ -1296,6 +1326,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "idna" version = "0.4.0" @@ -3558,6 +3612,21 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 7ee97b4f..c70055f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -123,6 +123,7 @@ thread_local = "1.1.7" hmac = "0.12.1" sha-1 = "0.10.1" # used for conduit's CLI and admin room command parsing +chrono = "0.4" clap = { version = "4.3.0", default-features = false, features = [ "derive", "error-context", diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 93975475..d4c8738d 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -199,6 +199,8 @@ async fn get_content( allow_remote: bool, authenticated: bool, ) -> Result { + services().media.check_blocked(server_name, &media_id)?; + if let Ok(Some(FileMeta { content_disposition, content_type, @@ -278,6 +280,8 @@ async fn get_content_as_filename( allow_remote: bool, authenticated: bool, ) -> Result { + services().media.check_blocked(server_name, &media_id)?; + if let Ok(Some(FileMeta { file, content_type, .. })) = services() @@ -371,6 +375,8 @@ async fn get_content_thumbnail( allow_remote: bool, authenticated: bool, ) -> Result { + services().media.check_blocked(server_name, &media_id)?; + if let Some(FileMeta { file, content_type, diff --git a/src/api/server_server.rs b/src/api/server_server.rs index 3f780ebd..5cd46e26 100644 --- a/src/api/server_server.rs +++ b/src/api/server_server.rs @@ -2221,6 +2221,10 @@ pub async fn create_invite_route( pub async fn get_content_route( body: Ruma, ) -> Result { + services() + .media + .check_blocked(services().globals.server_name(), &body.media_id)?; + if let Some(FileMeta { content_disposition, content_type, @@ -2249,6 +2253,10 @@ pub async fn get_content_route( pub async fn get_content_thumbnail_route( body: Ruma, ) -> Result { + services() + .media + .check_blocked(services().globals.server_name(), &body.media_id)?; + let Some(FileMeta { file, content_type, diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 6f835b5b..f1a3f6e8 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, ops::Range}; +use std::{collections::BTreeMap, ops::Range, slice::Split}; use ruma::{api::client::error::ErrorKind, OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; @@ -6,7 +6,10 @@ use tracing::error; use crate::{ database::KeyValueDatabase, - service::{self, media::DbFileMeta}, + service::{ + self, + media::{BlockedMediaInfo, DbFileMeta}, + }, utils, Error, Result, }; @@ -20,11 +23,14 @@ impl service::media::Data for KeyValueDatabase { filename: Option<&str>, content_type: Option<&str>, user_id: Option<&UserId>, + is_blocked_filehash: bool, ) -> Result<()> { - let metadata = FilehashMetadata::new(file_size); + if !is_blocked_filehash { + let metadata = FilehashMetadata::new(file_size); - self.filehash_metadata - .insert(&sha256_digest, metadata.value())?; + self.filehash_metadata + .insert(&sha256_digest, metadata.value())?; + }; let mut key = sha256_digest.to_vec(); key.extend_from_slice(servername.as_bytes()); @@ -167,9 +173,12 @@ impl service::media::Data for KeyValueDatabase { value.truncate(32); let sha256_digest = value; + let is_blocked = self.is_blocked_filehash(&sha256_digest)?; let sha256_hex = hex::encode(&sha256_digest); - self.purge_filehash(sha256_digest, false)?; + // If the file is blocked, we want to keep the metadata about it so it can be viewed, + // as well as filehashes blocked + self.purge_filehash(sha256_digest, is_blocked)?; Ok(sha256_hex) }; @@ -195,11 +204,14 @@ impl service::media::Data for KeyValueDatabase { files.push(purge(value)); } } else { - match self.purge_mediaid(server_name, media_id, false) { - Ok(f) => { + match self + .is_blocked(server_name, media_id) + .map(|is_blocked| self.purge_mediaid(server_name, media_id, is_blocked)) + { + Ok(Ok(f)) => { files.append(&mut f.into_iter().map(Ok).collect()); } - Err(e) => files.push(Err(e)), + Ok(Err(e)) | Err(e) => files.push(Err(e)), } } } @@ -221,8 +233,11 @@ impl service::media::Data for KeyValueDatabase { let purge_filehash = |sha256_digest: Vec| { let sha256_hex = hex::encode(&sha256_digest); + let is_blocked = self.is_blocked_filehash(&sha256_digest)?; - self.purge_filehash(sha256_digest, false)?; + // If the file is blocked, we want to keep the metadata about it so it can be viewed, + // as well as filehashes blocked + self.purge_filehash(sha256_digest, is_blocked)?; Ok(sha256_hex) }; @@ -310,11 +325,15 @@ impl service::media::Data for KeyValueDatabase { files.push(purge_filehash(sha256_digest)); } } else { - match self.purge_mediaid(user_id.server_name(), &media_id, false) { - Ok(f) => { + match self + .is_blocked(user_id.server_name(), &media_id) + .map(|is_blocked| { + self.purge_mediaid(user_id.server_name(), &media_id, is_blocked) + }) { + Ok(Ok(f)) => { files.append(&mut f.into_iter().map(Ok).collect()); } - Err(e) => files.push(Err(e)), + Ok(Err(e)) | Err(e) => files.push(Err(e)), } } } @@ -355,8 +374,11 @@ impl service::media::Data for KeyValueDatabase { } let sha256_hex = hex::encode(&sha256_digest); + let is_blocked = self.is_blocked_filehash(&sha256_digest)?; - self.purge_filehash(sha256_digest, false)?; + // If the file is blocked, we want to keep the metadata about it so it can be viewed, + // as well as filehashes blocked + self.purge_filehash(sha256_digest, is_blocked)?; files.push(Ok(sha256_hex)); Ok(()) @@ -379,9 +401,11 @@ impl service::media::Data for KeyValueDatabase { .map(utils::string_from_bytes)? .map_err(|_| Error::bad_database("Invalid Media ID String in metadata key"))?; + let is_blocked = self.is_blocked(&server_name, &media_id)?; + files.append( &mut self - .purge_mediaid(&server_name, &media_id, false)? + .purge_mediaid(&server_name, &media_id, is_blocked)? .into_iter() .map(Ok) .collect(), @@ -406,9 +430,363 @@ impl service::media::Data for KeyValueDatabase { files } + + fn is_blocked(&self, server_name: &ServerName, media_id: &str) -> Result { + let blocked_via_hash = || { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + let Some(metadata) = self.servernamemediaid_metadata.get(&key)? else { + return Ok(false); + }; + + let sha256_digest = parse_metadata(&metadata).inspect_err(|e| { + error!("Error parsing metadata for \"mxc://{server_name}/{media_id}\" from servernamemediaid_metadata: {e}"); + })?.sha256_digest; + + self.is_blocked_filehash(&sha256_digest) + }; + + Ok(self.is_directly_blocked(server_name, media_id)? || blocked_via_hash()?) + } + + fn block( + &self, + media: &[(OwnedServerName, String)], + unix_secs: u64, + reason: Option, + ) -> Vec { + let reason = reason.unwrap_or_default(); + let unix_secs = unix_secs.to_be_bytes(); + + let mut errors = Vec::new(); + + for (server_name, media_id) in media { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + let mut value = unix_secs.to_vec(); + value.extend_from_slice(reason.as_bytes()); + + if let Err(e) = self.blocked_servername_mediaid.insert(&key, &value) { + errors.push(e); + } + } + + errors + } + + fn block_from_user( + &self, + user_id: &UserId, + now: u64, + reason: &str, + after: Option, + ) -> Vec { + let mut prefix = user_id.server_name().as_bytes().to_vec(); + prefix.push(0xff); + prefix.extend_from_slice(user_id.localpart().as_bytes()); + prefix.push(0xff); + + let mut value = now.to_be_bytes().to_vec(); + value.extend_from_slice(reason.as_bytes()); + + self.servername_userlocalpart_mediaid + .scan_prefix(prefix) + .map(|(k, _)| { + let parts = k.split(|&b| b == 0xff); + + let media_id = parts.last().ok_or_else(|| { + Error::bad_database("Invalid format of key in blocked_servername_mediaid") + })?; + + let mut key = user_id.server_name().as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id); + + let Some(mut meta) = self.servernamemediaid_metadata.get(&key)? else { + return Err(Error::bad_database( + "Invalid format of metadata in servernamemediaid_metadata", + )); + }; + meta.truncate(32); + let sha256_digest = meta; + + let Some(metadata) = self + .filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + else { + return Ok(()); + }; + + if after + .map(|after| Ok::(metadata.creation(&sha256_digest)? > after)) + .transpose()? + .unwrap_or(true) + { + self.blocked_servername_mediaid.insert(&key, &value) + } else { + Ok(()) + } + }) + .filter_map(Result::err) + .collect() + } + + fn unblock(&self, media: &[(OwnedServerName, String)]) -> Vec { + let maybe_remove_remaining_metadata = |metadata: &DbFileMeta, errors: &mut Vec| { + for (k, _) in self + .filehash_servername_mediaid + .scan_prefix(metadata.sha256_digest.clone()) + { + if let Some(servername_mediaid) = k.get(32..) { + if let Err(e) = self.blocked_servername_mediaid.remove(servername_mediaid) { + errors.push(e); + } + } else { + error!( + "Invalid format of key in filehash_servername_mediaid for media with sha256 content hash of {}", + hex::encode(&metadata.sha256_digest) + ); + errors.push(Error::BadDatabase( + "Invalid format of key in filehash_servername_mediaid", + )); + } + } + + let thumbnail_id_error = || { + error!( + "Invalid format of key in filehash_thumbnail_id for media with sha256 content hash of {}", + hex::encode(&metadata.sha256_digest) + ); + Error::BadDatabase("Invalid format of value in filehash_thumbnailid") + }; + + for (k, _) in self + .filehash_thumbnailid + .scan_prefix(metadata.sha256_digest.clone()) + { + if let Some(end) = k.len().checked_sub(9) { + if let Some(servername_mediaid) = k.get(32..end) { + if let Err(e) = self.blocked_servername_mediaid.remove(servername_mediaid) { + errors.push(e); + } + } else { + errors.push(thumbnail_id_error()); + } + errors.push(thumbnail_id_error()); + }; + } + + // If we don't have the actual file downloaded anymore, remove the remaining + // metadata of the file + match self + .filehash_metadata + .get(&metadata.sha256_digest) + .map(|opt| opt.is_none()) + { + Err(e) => errors.push(e), + Ok(true) => { + if let Err(e) = self.purge_filehash(metadata.sha256_digest.clone(), false) { + errors.push(e); + } + } + Ok(false) => (), + } + }; + + let mut errors = Vec::new(); + + for (server_name, media_id) in media { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + match self + .servernamemediaid_metadata + .get(&key) + .map(|opt| opt.as_deref().map(parse_metadata)) + { + Err(e) => { + errors.push(e); + continue; + } + Ok(None) => (), + Ok(Some(Err(e))) => { + error!("Error parsing metadata for \"mxc://{server_name}/{media_id}\" from servernamemediaid_metadata: {e}"); + errors.push(e); + continue; + } + Ok(Some(Ok(metadata))) => { + maybe_remove_remaining_metadata(&metadata, &mut errors); + } + } + + key.push(0xff); + for (_, v) in self.thumbnailid_metadata.scan_prefix(key) { + match parse_metadata(&v) { + Ok(metadata) => { + maybe_remove_remaining_metadata(&metadata, &mut errors); + } + Err(e) => { + error!("Error parsing metadata for thumbnail of \"mxc://{server_name}/{media_id}\" from thumbnailid_metadata: {e}"); + errors.push(e); + } + } + } + } + + errors + } + + fn list_blocked(&self) -> Vec> { + let parse_servername = |parts: &mut Split<_, _>| { + OwnedServerName::try_from( + utils::string_from_bytes(parts.next().ok_or_else(|| { + Error::BadDatabase("Invalid format of metadata of blocked media") + })?) + .map_err(|_| Error::BadDatabase("Invalid server_name String of blocked data"))?, + ) + .map_err(|_| Error::BadDatabase("Invalid ServerName in blocked_servername_mediaid")) + }; + + let parse_string = + |parts: &mut Split<_, _>| { + utils::string_from_bytes(parts.next().ok_or_else(|| { + Error::BadDatabase("Invalid format of metadata of blocked media") + })?) + .map_err(|_| Error::BadDatabase("Invalid string in blocked media metadata")) + }; + + let splitter = |b: &u8| *b == 0xff; + + self.blocked_servername_mediaid + .iter() + .map(|(k, v)| { + let mut parts = k.split(splitter); + + // Using map_err, as inspect_err causes lifetime issues + // "implementation of `FnOnce` is not general enough" + let log_error = |e| { + error!("Error parsing key of blocked media: {e}"); + e + }; + + let server_name = parse_servername(&mut parts).map_err(log_error)?; + + let media_id = parse_string(&mut parts).map_err(log_error)?; + + let (unix_secs, reason) = v + .split_at_checked(8) + .map(|(secs, reason)| -> Result<(u64, Option)> { + Ok(( + secs.try_into() + .map_err(|_| { + Error::bad_database( + "Invalid block time in blocked_servername_mediaid ", + ) + }) + .map(u64::from_be_bytes)?, + if reason.is_empty() { + None + } else { + Some(utils::string_from_bytes(reason).map_err(|_| { + Error::bad_database("Invalid string in blocked media metadata") + })?) + }, + )) + }) + .ok_or_else(|| { + Error::bad_database("Invalid format of value in blocked_servername_mediaid") + })??; + + let sha256_hex = self.servernamemediaid_metadata.get(&k)?.map(|mut meta| { + meta.truncate(32); + hex::encode(meta) + }); + + Ok(BlockedMediaInfo { + server_name, + media_id, + unix_secs, + reason, + sha256_hex, + }) + }) + .collect() + } + + fn is_blocked_filehash(&self, sha256_digest: &[u8]) -> Result { + for (filehash_servername_mediaid, _) in self + .filehash_servername_mediaid + .scan_prefix(sha256_digest.to_owned()) + { + let servername_mediaid = filehash_servername_mediaid.get(32..).ok_or_else(|| { + error!( + "Invalid format of key in filehash_servername_mediaid for media with sha256 content hash of {}", + hex::encode(sha256_digest) + ); + Error::BadDatabase("Invalid format of key in filehash_servername_mediaid") + })?; + + if self + .blocked_servername_mediaid + .get(servername_mediaid)? + .is_some() + { + return Ok(true); + } + } + + let thumbnail_id_error = || { + error!( + "Invalid format of key in filehash_thumbnail_id for media with sha256 content hash of {}", + hex::encode(sha256_digest) + ); + Error::BadDatabase("Invalid format of value in filehash_thumbnailid") + }; + + for (thumbnail_id, _) in self + .filehash_thumbnailid + .scan_prefix(sha256_digest.to_owned()) + { + let servername_mediaid = thumbnail_id + .get( + 32..thumbnail_id + .len() + .checked_sub(9) + .ok_or_else(thumbnail_id_error)?, + ) + .ok_or_else(thumbnail_id_error)?; + + if self + .blocked_servername_mediaid + .get(servername_mediaid)? + .is_some() + { + return Ok(true); + } + } + + Ok(false) + } } impl KeyValueDatabase { + /// Only checks whether the media id itself is blocked, and not associated filehashes + fn is_directly_blocked(&self, server_name: &ServerName, media_id: &str) -> Result { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + self.blocked_servername_mediaid + .get(&key) + .map(|x| x.is_some()) + } + fn purge_mediaid( &self, server_name: &ServerName, diff --git a/src/database/mod.rs b/src/database/mod.rs index 925d636c..b564833b 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -179,6 +179,7 @@ pub struct KeyValueDatabase { pub(super) servernamemediaid_metadata: Arc, // Servername + MediaID -> content sha256 + Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints pub(super) filehash_servername_mediaid: Arc, // sha256 of content + Servername + MediaID, used to delete dangling references to filehashes from servernamemediaid pub(super) filehash_metadata: Arc, // sha256 of content -> file size + creation time + last access time + pub(super) blocked_servername_mediaid: Arc, // Servername + MediaID of blocked media -> time of block + reason pub(super) servername_userlocalpart_mediaid: Arc, // Servername + User Localpart + MediaID pub(super) servernamemediaid_userlocalpart: Arc, // Servername + MediaID -> User Localpart, used to remove keys from above when files are deleted by unrelated means pub(super) thumbnailid_metadata: Arc, // ThumbnailId = Servername + MediaID + width + height -> Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints @@ -389,6 +390,7 @@ impl KeyValueDatabase { servernamemediaid_metadata: builder.open_tree("servernamemediaid_metadata")?, filehash_servername_mediaid: builder.open_tree("filehash_servername_mediaid")?, filehash_metadata: builder.open_tree("filehash_metadata")?, + blocked_servername_mediaid: builder.open_tree("blocked_servername_mediaid")?, servername_userlocalpart_mediaid: builder .open_tree("servername_userlocalpart_mediaid")?, servernamemediaid_userlocalpart: builder diff --git a/src/service/admin/mod.rs b/src/service/admin/mod.rs index 0e3cef1e..2044c0ad 100644 --- a/src/service/admin/mod.rs +++ b/src/service/admin/mod.rs @@ -1,10 +1,12 @@ use std::{ + borrow::Cow, collections::BTreeMap, convert::TryFrom, sync::Arc, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; +use chrono::DateTime; use clap::{Args, Parser}; use regex::Regex; use ruma::{ @@ -37,7 +39,7 @@ use crate::{ Error, PduEvent, Result, }; -use super::pdu::PduBuilder; +use super::{media::BlockedMediaInfo, pdu::PduBuilder}; #[cfg_attr(test, derive(Debug))] #[derive(Parser)] @@ -180,6 +182,55 @@ enum AdminCommand { force_filehash: bool, }, + /// Prevents the list of media from being accessed, but does not delete the media if it + /// is already downloaded. If the media has already been downloaded, the sha256 hash + /// is blocked, meaning that any other current or future uploads/downloads of the exact same + /// content cannot be accessed either. + /// + /// There should be one MXC URI per line, all contained within a code-block + BlockMedia { + #[arg(long, short)] + /// Prevents the specified media from being downloaded in the future + /// + /// Note: This will also delete identical media uploaded by other users, so + /// only use this all the media is known to be undesirable + and_purge: bool, + #[arg(long, short)] + /// Optional reason as to why this media should be blocked + reason: Option, + }, + + /// Prevents all media uploaded by the local users, listed in a code-block, from being accessed + /// + /// Note: This will also block media with the same SHA256 hash, so + /// only use this when all media uploaded by the user is undesirable (or if + /// you only plan for the bloackage to be temporary) + BlockMediaFromUsers { + #[arg( + long, short, + value_parser = humantime::parse_duration + )] + /// Only block media uploaded in the last {timeframe} + /// + /// Should be in the form specified by humantime::parse_duration + /// (e.g. 48h, 60min, 10days etc.) + // --help is unformatted + #[allow(rustdoc::bare_urls)] + /// https://docs.rs/humantime/2.2.0/humantime/fn.parse_duration.html + from_last: Option, + #[arg(long, short)] + /// Optional reason as to why this media should be blocked + reason: Option, + }, + + /// Lists all media that is currently blocked + ListBlockedMedia, + + /// Allows previously blocked media to be accessed again. Will also unblock media with the + /// same SHA256 hash + /// There should be one MXC URI per line, all contained within a code-block + UnblockMedia, + /// Get the auth_chain of a PDU GetAuthChain { /// An event ID (the $ character followed by the base64 reference hash) @@ -986,6 +1037,121 @@ impl Service { )) } } + AdminCommand::BlockMedia { and_purge, reason } => media_from_body(body).map_or_else( + |message| message, + |media| { + let failed_count = services().media.block(&media, reason).len(); + let failed_purge_count = if and_purge { + services().media.purge(&media, true).len() + } else { + 0 + }; + + match (failed_count == 0, failed_purge_count == 0) { + (true, true) => RoomMessageEventContent::text_plain("Successfully blocked media"), + (false, true) => RoomMessageEventContent::text_plain(format!( + "Failed to block {failed_count} media, check logs for more details" + )), + (true, false ) => RoomMessageEventContent::text_plain(format!( + "Failed to purge {failed_purge_count} media, check logs for more details" + )), + (false, false) => RoomMessageEventContent::text_plain(format!( + "Failed to block {failed_count}, and purge {failed_purge_count} media, check logs for more details" + )) + } + }, + ), + AdminCommand::BlockMediaFromUsers { from_last, reason } => { + let after = from_last.map(unix_secs_from_duration).transpose()?; + + if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" + { + let user_ids = match userids_from_body(&body)? { + Ok(v) => v, + Err(message) => return Ok(message), + }; + + let mut failed_count = 0; + + for user_id in user_ids { + let reason = reason.as_ref().map_or_else( + || Cow::Owned(format!("uploaded by {user_id}")), + Cow::Borrowed, + ); + + failed_count += services() + .media + .block_from_user(user_id, &reason, after) + .len(); + } + + if failed_count == 0 { + RoomMessageEventContent::text_plain("Successfully blocked media") + } else { + RoomMessageEventContent::text_plain(format!( + "Failed to block {failed_count} media, check logs for more details" + )) + } + } else { + RoomMessageEventContent::text_plain( + "Expected code block in command body. Add --help for details.", + ) + } + } + AdminCommand::ListBlockedMedia => { + let mut markdown_message = String::from( + "| SHA256 hash | MXC URI | Time Blocked | Reason |\n| --- | --- | --- | --- |", + ); + let mut html_message = String::from( + r#""#, + ); + + for media in services().media.list_blocked() { + let Ok(BlockedMediaInfo { + server_name, + media_id, + unix_secs, + reason, + sha256_hex, + }) = media else { + continue; + }; + + let sha256_hex = sha256_hex.unwrap_or_default(); + let reason = reason.unwrap_or_default(); + + let time = i64::try_from(unix_secs) + .map(|unix_secs| DateTime::from_timestamp(unix_secs, 0)) + .ok() + .flatten() + .expect("Time is valid"); + + markdown_message + .push_str(&format!("\n| {sha256_hex} | mxc://{server_name}/{media_id} | {time} | {reason} |")); + + html_message.push_str(&format!( + "", + )) + } + + html_message.push_str("
SHA256 hashMXC URITime BlockedReason
{sha256_hex}mxc://{server_name}/{media_id}{time}{reason}
"); + + RoomMessageEventContent::text_html(markdown_message, html_message) + } + AdminCommand::UnblockMedia => media_from_body(body).map_or_else( + |message| message, + |media| { + let failed_count = services().media.unblock(&media).len(); + + if failed_count == 0 { + RoomMessageEventContent::text_plain("Successfully unblocked media") + } else { + RoomMessageEventContent::text_plain(format!( + "Failed to unblock {failed_count} media, check logs for more details" + )) + } + }, + ), AdminCommand::SignJson => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" { diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 97074d30..f6da1788 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -1,7 +1,9 @@ use ruma::{OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; -use crate::Result; +use crate::{Error, Result}; + +use super::BlockedMediaInfo; use super::DbFileMeta; @@ -16,6 +18,7 @@ pub trait Data: Send + Sync { filename: Option<&str>, content_type: Option<&str>, user_id: Option<&UserId>, + is_blocked_filehash: bool, ) -> Result<()>; fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result; @@ -62,4 +65,32 @@ pub trait Data: Send + Sync { force_filehash: bool, after: Option, ) -> Vec>; + + fn is_blocked(&self, server_name: &ServerName, media_id: &str) -> Result; + + fn block( + &self, + media: &[(OwnedServerName, String)], + unix_secs: u64, + reason: Option, + ) -> Vec; + + fn block_from_user( + &self, + user_id: &UserId, + now: u64, + reason: &str, + after: Option, + ) -> Vec; + + fn unblock(&self, media: &[(OwnedServerName, String)]) -> Vec; + + /// Returns a Vec of: + /// - The server the media is from + /// - The media id + /// - The time it was blocked, in unix seconds + /// - The optional reason why it was blocked + fn list_blocked(&self) -> Vec>; + + fn is_blocked_filehash(&self, sha256_digest: &[u8]) -> Result; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 8cf1d6b5..d9ae2b22 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -12,7 +12,7 @@ use tracing::error; use crate::{ config::{DirectoryStructure, MediaConfig}, - services, Error, Result, + services, utils, Error, Result, }; use image::imageops::FilterType; @@ -38,6 +38,14 @@ pub struct Service { pub db: &'static dyn Data, } +pub struct BlockedMediaInfo { + pub server_name: OwnedServerName, + pub media_id: String, + pub unix_secs: u64, + pub reason: Option, + pub sha256_hex: Option, +} + impl Service { /// Uploads a file. pub async fn create( @@ -59,9 +67,16 @@ impl Service { filename, content_type, user_id, + self.db.is_blocked_filehash(&sha256_digest)?, )?; - create_file(&sha256_hex, file).await + if !self.db.is_blocked_filehash(&sha256_digest)? { + create_file(&sha256_hex, file).await + } else if user_id.is_none() { + Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) + } else { + Ok(()) + } } /// Uploads or replaces a file thumbnail. @@ -358,6 +373,49 @@ impl Service { purge_files(hashes) } + + /// Checks whether the media has been blocked by administrators, returning either + /// a database error, or a not found error if it is blocked + pub fn check_blocked(&self, server_name: &ServerName, media_id: &str) -> Result<()> { + if self.db.is_blocked(server_name, media_id)? { + Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) + } else { + Ok(()) + } + } + + /// Marks the specified media as blocked, preventing them from being accessed + pub fn block(&self, media: &[(OwnedServerName, String)], reason: Option) -> Vec { + let now = utils::secs_since_unix_epoch(); + + self.db.block(media, now, reason) + } + + /// Marks the media uploaded by a local user as blocked, preventing it from being accessed + pub fn block_from_user( + &self, + user_id: &UserId, + reason: &str, + after: Option, + ) -> Vec { + let now = utils::secs_since_unix_epoch(); + + self.db.block_from_user(user_id, now, reason, after) + } + + /// Unblocks the specified media, allowing them from being accessed again + pub fn unblock(&self, media: &[(OwnedServerName, String)]) -> Vec { + self.db.unblock(media) + } + + /// Returns a Vec of: + /// - The server the media is from + /// - The media id + /// - The time it was blocked, in unix seconds + /// - The optional reason why it was blocked + pub fn list_blocked(&self) -> Vec> { + self.db.list_blocked() + } } /// Creates the media file, using the configured media backend From c3fb1b04569a5e538edd7e30d2a3b92bb8438b04 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Wed, 16 Apr 2025 13:15:01 +0100 Subject: [PATCH 11/15] feat(media): retention policies --- Cargo.lock | 21 +++ Cargo.toml | 3 + docs/configuration.md | 55 +++++++ src/config/mod.rs | 196 +++++++++++++++++++--- src/database/key_value/media.rs | 277 +++++++++++++++++++++++++++++++- src/database/mod.rs | 2 + src/main.rs | 2 +- src/service/globals/mod.rs | 4 +- src/service/media/data.rs | 26 ++- src/service/media/mod.rs | 169 +++++++++++++++---- src/service/mod.rs | 4 +- 11 files changed, 698 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5d5695b..9bd25569 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,6 +402,15 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +[[package]] +name = "bytesize" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3c8f83209414aacf0eeae3cf730b18d6981697fba62f200fcfb92b9f082acba" +dependencies = [ + "serde", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -524,6 +533,7 @@ dependencies = [ "axum-server", "base64 0.22.1", "bytes", + "bytesize", "chrono", "clap", "directories", @@ -534,6 +544,7 @@ dependencies = [ "hmac", "http 1.1.0", "humantime", + "humantime-serde", "hyper 1.3.1", "hyper-util", "image", @@ -1232,6 +1243,16 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +[[package]] +name = "humantime-serde" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a3db5ea5923d99402c94e9feb261dc5ee9b4efa158b0315f788cf549cc200c" +dependencies = [ + "humantime", + "serde", +] + [[package]] name = "hyper" version = "0.14.29" diff --git a/Cargo.toml b/Cargo.toml index c70055f1..511a6f4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,6 +88,9 @@ image = { version = "0.25", default-features = false, features = [ # Used for creating media filenames hex = "0.4" sha2 = "0.10" +# Used for parsing media retention policies from the config +bytesize = { version = "2", features = ["serde"] } +humantime-serde = "1" # Used to encode server public key base64 = "0.22" # Used when hashing the state diff --git a/docs/configuration.md b/docs/configuration.md index 3323fb64..145c2c48 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -99,6 +99,61 @@ depth = 4 length = 2 ``` +#### Retention policies +Over time, the amount of media will keep growing, even if they were only accessed once. +Retention policies allow for media files to automatically be deleted if they meet certain crietia, +allowing disk space to be saved. + +This can be configured via the `retention` field of the media config, which is an array with +"scopes" specified +- `scope`: specifies what type of media this policy applies to. If unset, all other scopes which + you have not configured will use this as a default. Possible values: `"local"`, `"remote"`, + `"thumbnail"` +- `accessed`: the maximum amount of time since the media was last accessed, + in the form specified by [`humantime::parse_duration`](https://docs.rs/humantime/2.2.0/humantime/fn.parse_duration.html) + (e.g. `"240h"`, `"1400min"`, `"2months"`, etc.) +- `created`: the maximum amount of time since the media was created after, in the same format as + `accessed` above. +- `space`: the maximum amount of space all of the media in this scope can occupy (if no scope is + specified, this becomes the total for **all** media). If the creation/downloading of new media, + will cause this to be exceeded, the last accessed media will be deleted repetitively until there + is enough space for the new media. The format is specified by [`ByteSize`](https://docs.rs/bytesize/2.0.1/bytesize/index.html) + (e.g. `"10000MB"`, `"15GiB"`, `"1.5TB"`, etc.) + +Media needs to meet **all** the specified requirements to be kept, otherwise, it will be deleted. +This means that thumbnails have to meet both the `"thumbnail"`, and either `"local"` or `"remote"` +requirements in order to be kept. + +If the media does not meet the `accessed` or `created` requirement, they will be deleted during a +periodic cleanup, which happens every 1/10th of the period of the shortest retention time, with a +maximum frequency of every minute, and a minimum of every 24 hours. For example, if I set my +`accessed` time for all media to `"2months"`, but override that to be `"48h"` for thumbnails, +the cleanup will happen every 4.8 hours. + +##### Example +```toml +# Total of 40GB for all media +[[global.media.retention]] # Notice the double "[]", due to this being a table item in an array +space = "40G" + +# Delete remote media not accessed for 30 days, or older than 90 days +[[global.media.retention]] +scope = "remote" +accessed = "30d" +created = "90days" # you can mix and match between the long and short format + +# Delete local media not accessed for 1 year +[[global.media.retention]] +scope = "local" +accessed = "1y" + +# Only store 1GB of thumbnails +[[global.media.retention]] +scope = "thumbnail" +space = "1GB" + +``` + ### TLS The `tls` table contains the following fields: - `certs`: The path to the public PEM certificate diff --git a/src/config/mod.rs b/src/config/mod.rs index 46dcf7e0..bfe4065e 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,13 +1,16 @@ use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr}, num::NonZeroU8, path::PathBuf, + time::Duration, }; +use bytesize::ByteSize; use ruma::{OwnedServerName, RoomVersionId}; use serde::{de::IgnoredAny, Deserialize}; +use tokio::time::{interval, Interval}; use tracing::warn; use url::Url; @@ -221,23 +224,26 @@ impl From for Config { server: well_known_server, }; - let media = match media { - IncompleteMediaConfig::FileSystem { - path, - directory_structure, - } => MediaConfig::FileSystem { - path: path.unwrap_or_else(|| { - // We do this as we don't know if the path has a trailing slash, or even if the - // path separator is a forward or backward slash - [&database_path, "media"] - .iter() - .collect::() - .into_os_string() - .into_string() - .expect("Both inputs are valid UTF-8") - }), - directory_structure, + let media = MediaConfig { + backend: match media.backend { + IncompleteMediaBackendConfig::FileSystem { + path, + directory_structure, + } => MediaBackendConfig::FileSystem { + path: path.unwrap_or_else(|| { + // We do this as we don't know if the path has a trailing slash, or even if the + // path separator is a forward or backward slash + [&database_path, "media"] + .iter() + .collect::() + .into_os_string() + .into_string() + .expect("Both inputs are valid UTF-8") + }), + directory_structure, + }, }, + retention: media.retention.into(), }; Config { @@ -317,9 +323,159 @@ pub struct WellKnownConfig { pub server: OwnedServerName, } +#[derive(Deserialize, Default)] +pub struct IncompleteMediaConfig { + #[serde(flatten, default)] + pub backend: IncompleteMediaBackendConfig, + pub retention: IncompleteMediaRetentionConfig, +} + +#[derive(Clone, Debug)] +pub struct MediaConfig { + pub backend: MediaBackendConfig, + pub retention: MediaRetentionConfig, +} + +type IncompleteMediaRetentionConfig = Option>; + +#[derive(Clone, Debug)] +pub struct MediaRetentionConfig { + pub scoped: HashMap, + pub global_space: Option, +} + +impl MediaRetentionConfig { + /// Interval for the duration-based retention policies to be checked & enforced + pub fn cleanup_interval(&self) -> Option { + self.scoped + .values() + .filter_map(|scoped| match (scoped.created, scoped.accessed) { + (None, accessed) => accessed, + (created, None) => created, + (created, accessed) => created.min(accessed), + }) + .map(|dur| { + dur.mul_f32(0.1) + .max(Duration::from_secs(60).min(Duration::from_secs(60 * 60 * 24))) + }) + .min() + .map(interval) + } +} + +#[derive(Deserialize)] +pub struct IncompleteScopedMediaRetentionConfig { + pub scope: Option, + #[serde(default, with = "humantime_serde::option")] + pub accessed: Option, + #[serde(default, with = "humantime_serde::option")] + pub created: Option, + pub space: Option, +} + +impl From for MediaRetentionConfig { + fn from(value: IncompleteMediaRetentionConfig) -> Self { + { + let mut scoped = HashMap::from([ + ( + MediaRetentionScope::Remote, + ScopedMediaRetentionConfig::default(), + ), + ( + MediaRetentionScope::Thumbnail, + ScopedMediaRetentionConfig::default(), + ), + ]); + let mut fallback = None; + + if let Some(retention) = value { + for IncompleteScopedMediaRetentionConfig { + scope, + accessed, + space, + created, + } in retention + { + if let Some(scope) = scope { + scoped.insert( + scope, + ScopedMediaRetentionConfig { + accessed, + space, + created, + }, + ); + } else { + fallback = Some(ScopedMediaRetentionConfig { + accessed, + space, + created, + }) + } + } + } + + if let Some(fallback) = fallback.clone() { + for scope in [ + MediaRetentionScope::Remote, + MediaRetentionScope::Local, + MediaRetentionScope::Thumbnail, + ] { + scoped.entry(scope).or_insert_with(|| fallback.clone()); + } + } + + Self { + global_space: fallback.and_then(|global| global.space), + scoped, + } + } + } +} + +impl std::hash::Hash for IncompleteScopedMediaRetentionConfig { + fn hash(&self, state: &mut H) { + self.scope.hash(state); + } +} + +impl PartialEq for IncompleteScopedMediaRetentionConfig { + fn eq(&self, other: &Self) -> bool { + self.scope == other.scope + } +} + +impl Eq for IncompleteScopedMediaRetentionConfig {} + +#[derive(Debug, Clone)] +pub struct ScopedMediaRetentionConfig { + pub accessed: Option, + pub created: Option, + pub space: Option, +} + +impl Default for ScopedMediaRetentionConfig { + fn default() -> Self { + Self { + // 30 days + accessed: Some(Duration::from_secs(60 * 60 * 24 * 30)), + created: None, + space: None, + } + } +} + +#[derive(Deserialize, Clone, Debug, Hash, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum MediaRetentionScope { + Remote, + Local, + Thumbnail, +} + #[derive(Deserialize)] #[serde(tag = "backend", rename_all = "lowercase")] -pub enum IncompleteMediaConfig { +pub enum IncompleteMediaBackendConfig { FileSystem { path: Option, #[serde(default)] @@ -327,7 +483,7 @@ pub enum IncompleteMediaConfig { }, } -impl Default for IncompleteMediaConfig { +impl Default for IncompleteMediaBackendConfig { fn default() -> Self { Self::FileSystem { path: None, @@ -337,7 +493,7 @@ impl Default for IncompleteMediaConfig { } #[derive(Debug, Clone)] -pub enum MediaConfig { +pub enum MediaBackendConfig { FileSystem { path: String, directory_structure: DirectoryStructure, diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index f1a3f6e8..27a239fd 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,16 +1,18 @@ use std::{collections::BTreeMap, ops::Range, slice::Split}; +use bytesize::ByteSize; use ruma::{api::client::error::ErrorKind, OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; use tracing::error; use crate::{ + config::{MediaRetentionConfig, MediaRetentionScope}, database::KeyValueDatabase, service::{ self, - media::{BlockedMediaInfo, DbFileMeta}, + media::{BlockedMediaInfo, Data as _, DbFileMeta, MediaType}, }, - utils, Error, Result, + services, utils, Error, Result, }; impl service::media::Data for KeyValueDatabase { @@ -773,6 +775,140 @@ impl service::media::Data for KeyValueDatabase { Ok(false) } + + fn files_to_delete( + &self, + sha256_digest: &[u8], + retention: &MediaRetentionConfig, + media_type: MediaType, + new_size: u64, + ) -> Result>> { + // If the file already exists, no space needs to be cleared + if self.filehash_metadata.get(sha256_digest)?.is_some() { + return Ok(Vec::new()); + } + + let scoped_space = |scope| retention.scoped.get(&scope).and_then(|policy| policy.space); + + let mut files_to_delete = Vec::new(); + + if media_type.is_thumb() { + if let Some(mut f) = self.purge_if_necessary( + scoped_space(MediaRetentionScope::Thumbnail), + |k| self.file_is_thumb(k), + &new_size, + ) { + files_to_delete.append(&mut f); + } + } + + match media_type { + MediaType::LocalMedia { thumbnail: _ } => { + if let Some(mut f) = self.purge_if_necessary( + scoped_space(MediaRetentionScope::Local), + |k| self.file_is_local(k).unwrap_or(true), + &new_size, + ) { + files_to_delete.append(&mut f); + } + } + MediaType::RemoteMedia { thumbnail: _ } => { + if let Some(mut f) = self.purge_if_necessary( + scoped_space(MediaRetentionScope::Remote), + |k| !self.file_is_local(k).unwrap_or(true), + &new_size, + ) { + files_to_delete.append(&mut f); + } + } + } + + if let Some(mut f) = self.purge_if_necessary(retention.global_space, |_| true, &new_size) { + files_to_delete.append(&mut f); + } + + Ok(files_to_delete) + } + + fn cleanup_time_retention(&self, retention: &MediaRetentionConfig) -> Vec> { + let now = utils::secs_since_unix_epoch(); + + let should_be_deleted = |k: &[u8], metadata: &FilehashMetadata| { + let check_policy = |retention_scope| { + if let Some(scoped_retention) = retention.scoped.get(&retention_scope) { + if let Some(created_policy) = scoped_retention.created { + if now - metadata.creation(k)? > created_policy.as_secs() { + return Ok(true); + } + } + + if let Some(accessed_policy) = scoped_retention.accessed { + if now - metadata.last_access(k)? > accessed_policy.as_secs() { + return Ok(true); + } + } + } + Ok(false) + }; + + if self.file_is_thumb(k) && check_policy(MediaRetentionScope::Thumbnail)? { + return Ok(true); + } + + if self.file_is_local(k)? { + check_policy(MediaRetentionScope::Local) + } else { + check_policy(MediaRetentionScope::Remote) + } + }; + + let mut files_to_delete = Vec::new(); + let mut errors_and_hashes = Vec::new(); + + for (k, v) in self.filehash_metadata.iter() { + match should_be_deleted(&k, &FilehashMetadata::from_vec(v)) { + Ok(true) => files_to_delete.push(k), + Ok(false) => (), + Err(e) => errors_and_hashes.push(Err(e)), + } + } + + errors_and_hashes.append(&mut self.purge(files_to_delete)); + + errors_and_hashes + } + + fn update_last_accessed(&self, server_name: &ServerName, media_id: &str) -> Result<()> { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + if let Some(mut meta) = self.servernamemediaid_metadata.get(&key)? { + meta.truncate(32); + let sha256_digest = meta; + + self.update_last_accessed_filehash(&sha256_digest) + } else { + // File was probably deleted just as we were fetching it, so nothing to do + Ok(()) + } + } + + fn update_last_accessed_filehash(&self, sha256_digest: &[u8]) -> Result<()> { + if let Some(mut metadata) = self + .filehash_metadata + .get(sha256_digest)? + .map(FilehashMetadata::from_vec) + { + metadata.update_last_access(); + + self.filehash_metadata + .insert(sha256_digest, metadata.value()) + } else { + // File was probably deleted just as we were fetching it, so nothing to do + Ok(()) + } + } } impl KeyValueDatabase { @@ -930,6 +1066,119 @@ impl KeyValueDatabase { self.filehash_metadata.remove(&sha256_digest) } + + fn file_is_local(&self, k: &[u8]) -> Result { + for (k, _) in self.filehash_servername_mediaid.scan_prefix(k.to_vec()) { + let mut parts = k + .get(32..) + .map(|k| k.split(|&b| b == 0xff)) + .ok_or_else(|| { + Error::bad_database("Invalid format of key in filehash_servername_mediaid") + })?; + + let Some(server_name) = parts.next() else { + return Err(Error::bad_database( + "Invalid format of key in filehash_servername_mediaid", + )); + }; + + if utils::string_from_bytes(server_name).map_err(|_| { + Error::bad_database("Invalid UTF-8 servername in filehash_servername_mediaid") + })? == services().globals.server_name().as_str() + { + return Ok(true); + } + } + + Ok(false) + } + + fn file_is_thumb(&self, k: &[u8]) -> bool { + self.filehash_thumbnailid + .scan_prefix(k.to_vec()) + .next() + .is_some() + && self + .filehash_servername_mediaid + .scan_prefix(k.to_vec()) + .next() + .is_none() + } + + fn purge_if_necessary( + &self, + space: Option, + filter: impl Fn(&[u8]) -> bool, + new_size: &u64, + ) -> Option>> { + if let Some(space) = space { + let mut candidate_files_to_delete = Vec::new(); + let mut errors_and_hashes = Vec::new(); + let mut total_size = 0; + + let parse_value = |k: Vec, v: &FilehashMetadata| { + let last_access = v.last_access(&k)?; + let size = v.size(&k)?; + Ok((k, last_access, size)) + }; + + for (k, v) in self.filehash_metadata.iter().filter(|(k, _)| filter(k)) { + match parse_value(k, &FilehashMetadata::from_vec(v)) { + Ok(x) => { + total_size += x.2; + candidate_files_to_delete.push(x) + } + Err(e) => errors_and_hashes.push(Err(e)), + } + } + + if let Some(required_to_delete) = (total_size + *new_size).checked_sub(space.as_u64()) { + candidate_files_to_delete.sort_by_key(|(_, last_access, _)| *last_access); + candidate_files_to_delete.reverse(); + + let mut size_sum = 0; + let mut take = candidate_files_to_delete.len(); + + for (i, (_, _, file_size)) in candidate_files_to_delete.iter().enumerate() { + size_sum += file_size; + if size_sum >= required_to_delete { + take = i + 1; + break; + } + } + + errors_and_hashes.append( + &mut self.purge( + candidate_files_to_delete + .into_iter() + .take(take) + .map(|(hash, _, _)| hash) + .collect(), + ), + ); + + Some(errors_and_hashes) + } else { + None + } + } else { + None + } + } + + fn purge(&self, hashes: Vec>) -> Vec> { + hashes + .into_iter() + .map(|sha256_digest| { + let sha256_hex = hex::encode(&sha256_digest); + let is_blocked = self.is_blocked_filehash(&sha256_digest)?; + + self.purge_filehash(sha256_digest, is_blocked)?; + + Ok(sha256_hex) + }) + .collect() + } } fn parse_metadata(value: &[u8]) -> Result { @@ -994,6 +1243,12 @@ impl FilehashMetadata { Self { value: vec } } + pub fn update_last_access(&mut self) { + let now = utils::secs_since_unix_epoch().to_be_bytes(); + self.value.truncate(16); + self.value.extend_from_slice(&now); + } + pub fn value(&self) -> &[u8] { &self.value } @@ -1025,6 +1280,15 @@ impl FilehashMetadata { }) } + pub fn size(&self, sha256_digest: &[u8]) -> Result { + self.get_u64_val( + 0..8, + "file size", + sha256_digest, + "Invalid file size in filehash_metadata", + ) + } + pub fn creation(&self, sha256_digest: &[u8]) -> Result { self.get_u64_val( 8..16, @@ -1033,4 +1297,13 @@ impl FilehashMetadata { "Invalid creation time in filehash_metadata", ) } + + pub fn last_access(&self, sha256_digest: &[u8]) -> Result { + self.get_u64_val( + 16..24, + "last access time", + sha256_digest, + "Invalid last access time in filehash_metadata", + ) + } } diff --git a/src/database/mod.rs b/src/database/mod.rs index b564833b..e1389bc7 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1103,6 +1103,8 @@ impl KeyValueDatabase { services().sending.start_handler(); + services().media.start_time_retention_checker(); + Self::start_cleanup_task().await; if services().globals.allow_check_for_updates() { Self::start_check_for_updates_task(); diff --git a/src/main.rs b/src/main.rs index 01af9ad2..1a00e9ff 100644 --- a/src/main.rs +++ b/src/main.rs @@ -49,7 +49,7 @@ static SUB_TABLES: [&str; 3] = ["well_known", "tls", "media"]; // Not doing `pro // Yeah, I know it's terrible, but since it seems the container users dont want syntax like A[B][C]="...", // this is what we have to deal with. Also see: https://github.com/SergioBenitez/Figment/issues/12#issuecomment-801449465 -static SUB_SUB_TABLES: [&str; 1] = ["directory_structure"]; +static SUB_SUB_TABLES: [&str; 2] = ["directory_structure", "retention"]; #[tokio::main] async fn main() { diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index ac77afe9..069adef3 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -8,7 +8,7 @@ use ruma::{ use crate::api::server_server::DestinationResponse; use crate::{ - config::{DirectoryStructure, MediaConfig, TurnConfig}, + config::{DirectoryStructure, MediaBackendConfig, TurnConfig}, services, Config, Error, Result, }; use futures_util::FutureExt; @@ -230,7 +230,7 @@ impl Service { // Remove this exception once other media backends are added #[allow(irrefutable_let_patterns)] - if let MediaConfig::FileSystem { path, .. } = &s.config.media { + if let MediaBackendConfig::FileSystem { path, .. } = &s.config.media.backend { fs::create_dir_all(path)?; } diff --git a/src/service/media/data.rs b/src/service/media/data.rs index f6da1788..9f1d48c9 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -1,11 +1,9 @@ use ruma::{OwnedServerName, ServerName, UserId}; use sha2::{digest::Output, Sha256}; -use crate::{Error, Result}; +use crate::{config::MediaRetentionConfig, Error, Result}; -use super::BlockedMediaInfo; - -use super::DbFileMeta; +use super::{BlockedMediaInfo, DbFileMeta, MediaType}; pub trait Data: Send + Sync { #[allow(clippy::too_many_arguments)] @@ -93,4 +91,24 @@ pub trait Data: Send + Sync { fn list_blocked(&self) -> Vec>; fn is_blocked_filehash(&self, sha256_digest: &[u8]) -> Result; + + /// Gets the files that need to be deleted from the media backend in order to meet the `space` + /// requirements, as specified in the retention config. Calling this also causes those files' + /// metadata to be deleted from the database. + fn files_to_delete( + &self, + sha256_digest: &[u8], + retention: &MediaRetentionConfig, + media_type: MediaType, + new_size: u64, + ) -> Result>>; + + /// Gets the files that need to be deleted from the media backend in order to meet the + /// time-based requirements (`created` and `accessed`), as specified in the retention config. + /// Calling this also causes those files' metadata to be deleted from the database. + fn cleanup_time_retention(&self, retention: &MediaRetentionConfig) -> Vec>; + + fn update_last_accessed(&self, server_name: &ServerName, media_id: &str) -> Result<()>; + + fn update_last_accessed_filehash(&self, sha256_digest: &[u8]) -> Result<()>; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index d9ae2b22..a26e615d 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,5 +1,5 @@ mod data; -use std::{fs, io::Cursor}; +use std::{fs, io::Cursor, sync::Arc}; pub use data::Data; use ruma::{ @@ -8,10 +8,10 @@ use ruma::{ OwnedServerName, ServerName, UserId, }; use sha2::{digest::Output, Digest, Sha256}; -use tracing::error; +use tracing::{error, info}; use crate::{ - config::{DirectoryStructure, MediaConfig}, + config::{DirectoryStructure, MediaBackendConfig}, services, utils, Error, Result, }; use image::imageops::FilterType; @@ -34,6 +34,29 @@ pub struct FileMeta { pub file: Vec, } +pub enum MediaType { + LocalMedia { thumbnail: bool }, + RemoteMedia { thumbnail: bool }, +} + +impl MediaType { + pub fn new(server_name: &ServerName, thumbnail: bool) -> Self { + if server_name == services().globals.server_name() { + Self::LocalMedia { thumbnail } + } else { + Self::RemoteMedia { thumbnail } + } + } + + pub fn is_thumb(&self) -> bool { + match self { + MediaType::LocalMedia { thumbnail } | MediaType::RemoteMedia { thumbnail } => { + *thumbnail + } + } + } +} + pub struct Service { pub db: &'static dyn Data, } @@ -47,6 +70,34 @@ pub struct BlockedMediaInfo { } impl Service { + pub fn start_time_retention_checker(self: &Arc) { + let self2 = Arc::clone(self); + if let Some(cleanup_interval) = services().globals.config.media.retention.cleanup_interval() + { + tokio::spawn(async move { + let mut i = cleanup_interval; + loop { + i.tick().await; + let _ = self2.try_purge_time_retention().await; + } + }); + } + } + + async fn try_purge_time_retention(&self) -> Result<()> { + info!("Checking if any media should be deleted due to time-based retention policies"); + let files = self + .db + .cleanup_time_retention(&services().globals.config.media.retention); + + let count = files.iter().filter(|res| res.is_ok()).count(); + info!("Found {count} media files to delete"); + + purge_files(files); + + Ok(()) + } + /// Uploads a file. pub async fn create( &self, @@ -59,6 +110,16 @@ impl Service { ) -> Result<()> { let (sha256_digest, sha256_hex) = generate_digests(file); + for error in self.clear_required_space( + &sha256_digest, + MediaType::new(servername, false), + size(file)?, + )? { + error!( + "Error deleting file to clear space when downloading/creating new media file: {error}" + ) + } + self.db.create_file_metadata( sha256_digest, size(file)?, @@ -93,6 +154,12 @@ impl Service { ) -> Result<()> { let (sha256_digest, sha256_hex) = generate_digests(file); + self.clear_required_space( + &sha256_digest, + MediaType::new(servername, true), + size(file)?, + )?; + self.db.create_thumbnail_metadata( sha256_digest, size(file)?, @@ -125,7 +192,7 @@ impl Service { return Ok(None); } - let file = get_file(&hex::encode(sha256_digest)).await?; + let file = self.get_file(&sha256_digest, None).await?; Ok(Some(FileMeta { content_disposition: content_disposition(filename, &content_type), @@ -180,7 +247,9 @@ impl Service { } // Using saved thumbnail - let file = get_file(&hex::encode(sha256_digest)).await?; + let file = self + .get_file(&sha256_digest, Some((servername, media_id))) + .await?; Ok(Some(FileMeta { content_disposition: content_disposition(filename, &content_type), @@ -202,7 +271,7 @@ impl Service { let content_disposition = content_disposition(filename.clone(), &content_type); // Generate a thumbnail - let file = get_file(&hex::encode(sha256_digest)).await?; + let file = self.get_file(&sha256_digest, None).await?; if let Ok(image) = image::load_from_memory(&file) { let original_width = image.width(); @@ -303,7 +372,7 @@ impl Service { return Ok(None); } - let file = get_file(&hex::encode(sha256_digest)).await?; + let file = self.get_file(&sha256_digest, None).await?; Ok(Some(FileMeta { content_disposition: content_disposition(filename, &content_type), @@ -416,14 +485,73 @@ impl Service { pub fn list_blocked(&self) -> Vec> { self.db.list_blocked() } + + pub fn clear_required_space( + &self, + sha256_digest: &[u8], + media_type: MediaType, + new_size: u64, + ) -> Result> { + let files = self.db.files_to_delete( + sha256_digest, + &services().globals.config.media.retention, + media_type, + new_size, + )?; + + let count = files.iter().filter(|r| r.is_ok()).count(); + + if count != 0 { + info!("Deleting {} files to clear space for new media file", count); + } + + Ok(purge_files(files)) + } + + /// Fetches the file from the configured media backend, as well as updating the "last accessed" + /// part of the metadata of the file + /// + /// If specified, the original file will also have it's last accessed time updated, if present + /// (use when accessing thumbnails) + async fn get_file( + &self, + sha256_digest: &[u8], + original_file_id: Option<(&ServerName, &str)>, + ) -> Result> { + let file = match &services().globals.config.media.backend { + MediaBackendConfig::FileSystem { + path, + directory_structure, + } => { + let path = services().globals.get_media_path( + path, + directory_structure, + &hex::encode(sha256_digest), + )?; + + let mut file = Vec::new(); + File::open(path).await?.read_to_end(&mut file).await?; + + file + } + }; + + if let Some((server_name, media_id)) = original_file_id { + self.db.update_last_accessed(server_name, media_id)?; + } + + self.db + .update_last_accessed_filehash(sha256_digest) + .map(|_| file) + } } /// Creates the media file, using the configured media backend /// /// Note: this function does NOT set the metadata related to the file pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> { - match &services().globals.config.media { - MediaConfig::FileSystem { + match &services().globals.config.media.backend { + MediaBackendConfig::FileSystem { path, directory_structure, } => { @@ -439,25 +567,6 @@ pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> { Ok(()) } -/// Fetches the file from the configured media backend -async fn get_file(sha256_hex: &str) -> Result> { - Ok(match &services().globals.config.media { - MediaConfig::FileSystem { - path, - directory_structure, - } => { - let path = services() - .globals - .get_media_path(path, directory_structure, sha256_hex)?; - - let mut file = Vec::new(); - File::open(path).await?.read_to_end(&mut file).await?; - - file - } - }) -} - /// Purges the given files from the media backend /// Returns a `Vec` of errors that occurred when attempting to delete the files /// @@ -477,8 +586,8 @@ fn purge_files(hashes: Vec>) -> Vec { /// /// Note: this does NOT remove the related metadata from the database fn delete_file(sha256_hex: &str) -> Result<()> { - match &services().globals.config.media { - MediaConfig::FileSystem { + match &services().globals.config.media.backend { + MediaBackendConfig::FileSystem { path, directory_structure, } => { diff --git a/src/service/mod.rs b/src/service/mod.rs index c328bf7e..832ca8ae 100644 --- a/src/service/mod.rs +++ b/src/service/mod.rs @@ -34,7 +34,7 @@ pub struct Services { pub admin: Arc, pub globals: globals::Service, pub key_backups: key_backups::Service, - pub media: media::Service, + pub media: Arc, pub sending: Arc, } @@ -119,7 +119,7 @@ impl Services { account_data: account_data::Service { db }, admin: admin::Service::build(), key_backups: key_backups::Service { db }, - media: media::Service { db }, + media: Arc::new(media::Service { db }), sending: sending::Service::build(db, &config), globals: globals::Service::load(db, config)?, From fd16e9c509c74b5a4b0148d07194f50fe68b8b58 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Tue, 6 May 2025 00:36:32 +0100 Subject: [PATCH 12/15] feat(admin): list & query information about media --- src/database/key_value/media.rs | 461 +++++++++++++++++++++++++++++++- src/service/admin/mod.rs | 227 +++++++++++++++- src/service/media/data.rs | 15 +- src/service/media/mod.rs | 72 +++++ 4 files changed, 772 insertions(+), 3 deletions(-) diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 27a239fd..695c7d3c 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -10,7 +10,10 @@ use crate::{ database::KeyValueDatabase, service::{ self, - media::{BlockedMediaInfo, Data as _, DbFileMeta, MediaType}, + media::{ + BlockedMediaInfo, Data as _, DbFileMeta, FileInfo, MediaListItem, MediaQuery, + MediaQueryFileInfo, MediaQueryThumbInfo, MediaType, ServerNameOrUserId, + }, }, services, utils, Error, Result, }; @@ -164,6 +167,117 @@ impl service::media::Data for KeyValueDatabase { .ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found.")) } + fn query(&self, server_name: &ServerName, media_id: &str) -> Result { + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id.as_bytes()); + + Ok(MediaQuery { + is_blocked: self.is_directly_blocked(server_name, media_id)?, + source_file: if let Some(DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted, + }) = self + .servernamemediaid_metadata + .get(&key)? + .as_deref() + .map(parse_metadata) + .transpose()? + { + let sha256_hex = hex::encode(&sha256_digest); + + let uploader_localpart = self + .servernamemediaid_userlocalpart + .get(&key)? + .as_deref() + .map(utils::string_from_bytes) + .transpose() + .map_err(|_| { + error!("Invalid UTF-8 for uploader of mxc://{server_name}/{media_id}"); + Error::BadDatabase( + "Invalid UTF-8 in value of servernamemediaid_userlocalpart", + ) + })?; + + let is_blocked_via_filehash = self.is_blocked_filehash(&sha256_digest)?; + + let time_info = if let Some(filehash_meta) = self + .filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + { + Some(FileInfo { + creation: filehash_meta.creation(&sha256_digest)?, + last_access: filehash_meta.last_access(&sha256_digest)?, + size: filehash_meta.size(&sha256_digest)?, + }) + } else { + None + }; + + Some(MediaQueryFileInfo { + uploader_localpart, + sha256_hex, + filename, + content_type, + unauthenticated_access_permitted, + is_blocked_via_filehash, + file_info: time_info, + }) + } else { + None + }, + thumbnails: { + key.push(0xff); + + self.thumbnailid_metadata + .scan_prefix(key) + .map(|(k, v)| { + let (width, height) = dimensions_from_thumbnailid(&k)?; + + let DbFileMeta { + sha256_digest, + filename, + content_type, + unauthenticated_access_permitted, + } = parse_metadata(&v)?; + + let sha256_hex = hex::encode(&sha256_digest); + + let is_blocked_via_filehash = self.is_blocked_filehash(&sha256_digest)?; + + let time_info = if let Some(filehash_meta) = self + .filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + { + Some(FileInfo { + creation: filehash_meta.creation(&sha256_digest)?, + last_access: filehash_meta.last_access(&sha256_digest)?, + size: filehash_meta.size(&sha256_digest)?, + }) + } else { + None + }; + + Ok(MediaQueryThumbInfo { + width, + height, + sha256_hex, + filename, + content_type, + unauthenticated_access_permitted, + is_blocked_via_filehash, + file_info: time_info, + }) + }) + .collect::>()? + }, + }) + } + fn purge_and_get_hashes( &self, media: &[(OwnedServerName, String)], @@ -644,6 +758,336 @@ impl service::media::Data for KeyValueDatabase { errors } + fn list( + &self, + server_name_or_user_id: Option, + include_thumbnails: bool, + content_type: Option<&str>, + before: Option, + after: Option, + ) -> Result> { + let filter_medialistitem = |item: MediaListItem| { + if content_type.is_none_or(|ct_filter| { + item.content_type + .as_deref() + .map(|item_ct| { + if ct_filter.bytes().any(|char| char == b'/') { + item_ct == ct_filter + } else { + item_ct.starts_with(&(ct_filter.to_owned() + "/")) + } + }) + .unwrap_or_default() + }) && before.is_none_or(|before| item.creation < before) + && after.is_none_or(|after| item.creation > after) + { + Some(item) + } else { + None + } + }; + + let parse_servernamemediaid_metadata_iter = + |v: &[u8], next_part: Option<&[u8]>, server_name: &ServerName| { + let media_id_bytes = next_part.ok_or_else(|| { + Error::bad_database("Invalid format of key in servernamemediaid_metadata") + })?; + let media_id = utils::string_from_bytes(media_id_bytes).map_err(|_| { + Error::bad_database("Invalid Media ID String in servernamemediaid_metadata") + })?; + + let mut key = server_name.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id_bytes); + + let uploader_localpart = self + .servernamemediaid_userlocalpart + .get(&key)? + .as_deref() + .map(utils::string_from_bytes) + .transpose() + .map_err(|_| { + error!("Invalid localpart of uploader for mxc://{server_name}/{media_id}"); + Error::BadDatabase( + "Invalid uploader localpart in servernamemediaid_userlocalpart", + ) + })?; + + let DbFileMeta { + sha256_digest, + filename, + content_type, + .. + } = parse_metadata(v)?; + + self.filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + .map(|meta| { + Ok(filter_medialistitem(MediaListItem { + server_name: server_name.to_owned(), + media_id: media_id.clone(), + uploader_localpart, + content_type, + filename, + dimensions: None, + size: meta.size(&sha256_digest)?, + creation: meta.creation(&sha256_digest)?, + })) + }) + .transpose() + .map(Option::flatten) + }; + + let parse_thumbnailid_metadata_iter = + |k: &[u8], v: &[u8], media_id_part: Option<&[u8]>, server_name: &ServerName| { + let media_id_bytes = media_id_part.ok_or_else(|| { + Error::bad_database("Invalid format of key in servernamemediaid_metadata") + })?; + let media_id = utils::string_from_bytes(media_id_bytes).map_err(|_| { + Error::bad_database("Invalid Media ID String in servernamemediaid_metadata") + })?; + + let dimensions = dimensions_from_thumbnailid(k)?; + + let DbFileMeta { + sha256_digest, + filename, + content_type, + .. + } = parse_metadata(v)?; + + self.filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + .map(|meta| { + Ok(filter_medialistitem(MediaListItem { + server_name: server_name.to_owned(), + media_id, + uploader_localpart: None, + content_type, + filename, + dimensions: Some(dimensions), + size: meta.size(&sha256_digest)?, + creation: meta.creation(&sha256_digest)?, + })) + }) + .transpose() + .map(Option::flatten) + }; + + match server_name_or_user_id { + Some(ServerNameOrUserId::ServerName(server_name)) => { + let mut prefix = server_name.as_bytes().to_vec(); + prefix.push(0xff); + + let mut media = self + .servernamemediaid_metadata + .scan_prefix(prefix.clone()) + .map(|(k, v)| { + let mut parts = k.rsplit(|b: &u8| *b == 0xff); + + parse_servernamemediaid_metadata_iter(&v, parts.next(), &server_name) + }) + .filter_map(Result::transpose) + .collect::>>()?; + + if include_thumbnails { + media.append( + &mut self + .thumbnailid_metadata + .scan_prefix(prefix) + .map(|(k, v)| { + let mut parts = k.split(|b: &u8| *b == 0xff); + parts.next(); + + parse_thumbnailid_metadata_iter(&k, &v, parts.next(), &server_name) + }) + .filter_map(Result::transpose) + .collect::>>()?, + ); + } + + Ok(media) + } + Some(ServerNameOrUserId::UserId(user_id)) => { + let mut prefix = user_id.server_name().as_bytes().to_vec(); + prefix.push(0xff); + prefix.extend_from_slice(user_id.localpart().as_bytes()); + prefix.push(0xff); + + self.servername_userlocalpart_mediaid + .scan_prefix(prefix) + .map(|(k, _)| -> Result<_> { + let mut parts = k.rsplit(|b: &u8| *b == 0xff); + + let media_id_bytes = parts.next().ok_or_else(|| { + Error::bad_database( + "Invalid format of key in servername_userlocalpart_mediaid", + ) + })?; + let media_id = utils::string_from_bytes(media_id_bytes).map_err(|_| { + Error::bad_database( + "Invalid Media ID String in servername_userlocalpart_mediaid", + ) + })?; + + let mut key = user_id.server_name().as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(media_id_bytes); + + let Some(DbFileMeta { + sha256_digest, + filename, + content_type, + .. + }) = self + .servernamemediaid_metadata + .get(&key)? + .as_deref() + .map(parse_metadata) + .transpose()? + else { + error!( + "Missing metadata for \"mxc://{}/{media_id}\", despite storing it's uploader", + user_id.server_name() + ); + return Err(Error::BadDatabase( + "Missing metadata for media, despite storing it's uploader", + )); + }; + + let mut media = if let Some(item) = self + .filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + .map(|meta| { + Ok::<_, Error>(filter_medialistitem(MediaListItem { + server_name: user_id.server_name().to_owned(), + media_id: media_id.clone(), + uploader_localpart: Some(user_id.localpart().to_owned()), + content_type, + filename, + dimensions: None, + size: meta.size(&sha256_digest)?, + creation: meta.creation(&sha256_digest)?, + })) + }) + .transpose()? + .flatten() + { + vec![item] + } else { + Vec::new() + }; + + if include_thumbnails { + key.push(0xff); + + media.append( + &mut self + .thumbnailid_metadata + .scan_prefix(key) + .map(|(k, v)| { + let DbFileMeta { + sha256_digest, + filename, + content_type, + .. + } = parse_metadata(&v)?; + + let dimensions = dimensions_from_thumbnailid(&k)?; + + self.filehash_metadata + .get(&sha256_digest)? + .map(FilehashMetadata::from_vec) + .map(|meta| { + Ok(filter_medialistitem(MediaListItem { + server_name: user_id.server_name().to_owned(), + media_id: media_id.clone(), + uploader_localpart: Some( + user_id.localpart().to_owned(), + ), + content_type, + filename, + dimensions: Some(dimensions), + size: meta.size(&sha256_digest)?, + creation: meta.creation(&sha256_digest)?, + })) + }) + .transpose() + .map(Option::flatten) + }) + .filter_map(Result::transpose) + .collect::>>()?, + ); + }; + + Ok(media) + }) + .collect::>>>() + .map(|outer| outer.into_iter().flatten().collect::>()) + } + None => { + let splitter = |b: &u8| *b == 0xff; + + let get_servername = |parts: &mut Split<'_, u8, _>| -> Result<_> { + let server_name = parts + .next() + .ok_or_else(|| { + Error::bad_database( + "Invalid format of key in servernamemediaid_metadata", + ) + }) + .map(utils::string_from_bytes)? + .map_err(|_| { + Error::bad_database( + "Invalid ServerName String in servernamemediaid_metadata", + ) + }) + .map(OwnedServerName::try_from)? + .map_err(|_| { + Error::bad_database( + "Invalid ServerName String in servernamemediaid_metadata", + ) + })?; + + Ok(server_name) + }; + + let mut media = self + .servernamemediaid_metadata + .iter() + .map(|(k, v)| { + let mut parts = k.split(splitter); + let server_name = get_servername(&mut parts)?; + + parse_servernamemediaid_metadata_iter(&v, parts.next(), &server_name) + }) + .filter_map(Result::transpose) + .collect::>>()?; + + if include_thumbnails { + media.append( + &mut self + .thumbnailid_metadata + .iter() + .map(|(k, v)| { + let mut parts = k.split(splitter); + let server_name = get_servername(&mut parts)?; + + parse_thumbnailid_metadata_iter(&k, &v, parts.next(), &server_name) + }) + .filter_map(Result::transpose) + .collect::>>()?, + ); + } + + Ok(media) + } + } + } + fn list_blocked(&self) -> Vec> { let parse_servername = |parts: &mut Split<_, _>| { OwnedServerName::try_from( @@ -1216,6 +1660,21 @@ fn parse_metadata(value: &[u8]) -> Result { }) } +/// Attempts to parse the width and height from a "thumbnail id", returning the +/// width and height in that order +fn dimensions_from_thumbnailid(thumbnail_id: &[u8]) -> Result<(u32, u32)> { + let (width, height) = thumbnail_id[thumbnail_id + .len() + .checked_sub(8) + .ok_or_else(|| Error::BadDatabase("Invalid format of dimensions from thumbnailid"))?..] + .split_at(4); + + Ok(( + u32::from_be_bytes(width.try_into().expect("Length of slice is 4")), + u32::from_be_bytes(height.try_into().expect("Length of slice is 4")), + )) +} + pub struct FilehashMetadata { value: Vec, } diff --git a/src/service/admin/mod.rs b/src/service/admin/mod.rs index 2044c0ad..06e6d047 100644 --- a/src/service/admin/mod.rs +++ b/src/service/admin/mod.rs @@ -6,6 +6,7 @@ use std::{ time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; +use bytesize::ByteSize; use chrono::DateTime; use clap::{Args, Parser}; use regex::Regex; @@ -39,7 +40,13 @@ use crate::{ Error, PduEvent, Result, }; -use super::{media::BlockedMediaInfo, pdu::PduBuilder}; +use super::{ + media::{ + BlockedMediaInfo, FileInfo, MediaListItem, MediaQuery, MediaQueryFileInfo, + MediaQueryThumbInfo, ServerNameOrUserId, + }, + pdu::PduBuilder, +}; #[cfg_attr(test, derive(Debug))] #[derive(Parser)] @@ -125,6 +132,52 @@ enum AdminCommand { purge_media: DeactivatePurgeMediaArgs, }, + /// Shows information about the requested media + QueryMedia { + /// The MXC URI of the media you want to request information about + mxc: Box, + }, + + /// Lists all the media matching the specified requirements + ListMedia { + #[command(flatten)] + user_server_filter: ListMediaArgs, + + /// Whether to include thumbnails in the list. + /// It is recommended to do so if you are not only looking + /// for local media, as with remote media, the full media file + /// might not be downloaded, just the thumbnail + #[arg(short = 't', long)] + include_thumbnails: bool, + + #[arg(short, long)] + /// The content-type media must have to be listed. + /// if only a "type" (as opposed to "type/subtype") is specified, + /// all media with that type are returned, no matter the sub-type. + /// + /// For example, if you request content-type "image", than files + /// of content type "image/png", "image/jpeg", etc. will be returned. + content_type: Option, + + #[arg( + short = 'b', long, + value_parser = humantime::parse_rfc3339_weak + )] + /// The point in time after which media had to be uploaded to be + /// shown (in the UTC timezone). + /// Should be in the format {YYYY}-{MM}-{DD}T{hh}:{mm}:{ss} + uploaded_before: Option, + + #[arg( + short = 'a', long, + value_parser = humantime::parse_rfc3339_weak + )] + /// The point in time before which media had to be uploaded to be + /// shown (in the UTC timezone). + /// Should be in the format {YYYY}-{MM}-{DD}T{hh}:{mm}:{ss} + uploaded_after: Option, + }, + /// Purge a list of media, formatted as MXC URIs /// There should be one URI per line, all contained within a code-block /// @@ -331,6 +384,22 @@ pub struct DeactivatePurgeMediaArgs { force_filehash: bool, } +#[derive(Args, Debug)] +#[group(required = false)] +pub struct ListMediaArgs { + #[arg(short, long)] + /// The user that uploaded the media. + /// Only local media uploaders can be recorded, so specifying a non-local + /// user will always yield no results + user: Option>, + + #[arg(short, long)] + /// The server from which the media originated from. + /// If you want to list local media, just set this to + /// be your own server's servername + server: Option>, +} + #[derive(Debug)] pub enum AdminRoomEvent { ProcessMessage(String), @@ -960,6 +1029,162 @@ impl Service { ) } } + AdminCommand::QueryMedia { mxc } => { + let Ok((server_name, media_id)) = mxc.parts() else { + return Ok(RoomMessageEventContent::text_plain("Invalid media MXC")); + }; + + let MediaQuery{ is_blocked, source_file, thumbnails } = services().media.query(server_name, media_id)?; + let mut message = format!("Is blocked Media ID: {is_blocked}"); + + if let Some(MediaQueryFileInfo { + uploader_localpart, + sha256_hex, + filename, + content_type, + unauthenticated_access_permitted, + is_blocked_via_filehash, + file_info: time_info, + }) = source_file { + message.push_str("\n\nInformation on full (non-thumbnail) file:\n"); + + if let Some(FileInfo { + creation, + last_access, + size, + }) = time_info { + message.push_str(&format!("\nIs stored: true\nCreated at: {}\nLast accessed at: {}\nSize of file: {}", + DateTime::from_timestamp(creation.try_into().unwrap_or(i64::MAX),0).expect("Timestamp is within range"), + DateTime::from_timestamp(last_access.try_into().unwrap_or(i64::MAX),0).expect("Timestamp is within range"), + ByteSize::b(size).display().si() + )); + } else { + message.push_str("\nIs stored: false"); + } + + message.push_str(&format!("\nIs accessible via unauthenticated media endpoints: {unauthenticated_access_permitted}")); + message.push_str(&format!("\nSHA256 hash of file: {sha256_hex}")); + message.push_str(&format!("\nIs blocked due to sharing SHA256 hash with blocked media: {is_blocked_via_filehash}")); + + if let Some(localpart) = uploader_localpart { + message.push_str(&format!("\nUploader: @{localpart}:{server_name}")) + } + if let Some(filename) = filename { + message.push_str(&format!("\nFilename: {filename}")) + } + if let Some(content_type) = content_type { + message.push_str(&format!("\nContent-type: {content_type}")) + } + } + + if !thumbnails.is_empty() { + message.push_str("\n\nInformation on thumbnails of media:"); + } + + for MediaQueryThumbInfo{ + width, + height, + sha256_hex, + filename, + content_type, + unauthenticated_access_permitted, + is_blocked_via_filehash, + file_info: time_info, + } in thumbnails { + message.push_str(&format!("\n\nDimensions: {width}x{height}")); + if let Some(FileInfo { + creation, + last_access, + size, + }) = time_info { + message.push_str(&format!("\nIs stored: true\nCreated at: {}\nLast accessed at: {}\nSize of file: {}", + DateTime::from_timestamp(creation.try_into().unwrap_or(i64::MAX),0).expect("Timestamp is within range"), + DateTime::from_timestamp(last_access.try_into().unwrap_or(i64::MAX),0).expect("Timestamp is within range"), + ByteSize::b(size).display().si() + )); + } else { + message.push_str("\nIs stored: false"); + } + + message.push_str(&format!("\nIs accessible via unauthenticated media endpoints: {unauthenticated_access_permitted}")); + message.push_str(&format!("\nSHA256 hash of file: {sha256_hex}")); + message.push_str(&format!("\nIs blocked due to sharing SHA256 hash with blocked media: {is_blocked_via_filehash}")); + + if let Some(filename) = filename { + message.push_str(&format!("\nFilename: {filename}")) + } + if let Some(content_type) = content_type { + message.push_str(&format!("\nContent-type: {content_type}")) + } + } + + RoomMessageEventContent::text_plain(message) + } + AdminCommand::ListMedia { + user_server_filter: ListMediaArgs { + user, + server, + }, + include_thumbnails, + content_type, + uploaded_before, + uploaded_after, + } => { + let mut markdown_message = String::from( + "| MXC URI | Dimensions (if thumbnail) | Created/Downloaded at | Uploader | Content-Type | Filename | Size |\n| --- | --- | --- | --- | --- | --- | --- |", + ); + let mut html_message = String::from( + r#""#, + ); + + for MediaListItem{ + server_name, + media_id, + uploader_localpart, + content_type, + filename, + dimensions, + size, + creation, + } in services().media.list( + user + .map(ServerNameOrUserId::UserId) + .or_else(|| server.map(ServerNameOrUserId::ServerName)), + include_thumbnails, + content_type.as_deref(), + uploaded_before + .map(|ts| ts.duration_since(UNIX_EPOCH)) + .transpose() + .map_err(|_| Error::AdminCommand("Timestamp must be after unix epoch"))? + .as_ref() + .map(Duration::as_secs), + uploaded_after + .map(|ts| ts.duration_since(UNIX_EPOCH)) + .transpose() + .map_err(|_| Error::AdminCommand("Timestamp must be after unix epoch"))? + .as_ref() + .map(Duration::as_secs) + )? { + + let user_id = uploader_localpart.map(|localpart| format!("@{localpart}:{server_name}")).unwrap_or_default(); + let content_type = content_type.unwrap_or_default(); + let filename = filename.unwrap_or_default(); + let dimensions = dimensions.map(|(w, h)| format!("{w}x{h}")).unwrap_or_default(); + let size = ByteSize::b(size).display().si(); + let creation = DateTime::from_timestamp(creation.try_into().unwrap_or(i64::MAX),0).expect("Timestamp is within range"); + + markdown_message + .push_str(&format!("\n| mxc://{server_name}/{media_id} | {dimensions} | {creation} | {user_id} | {content_type} | {filename} | {size} |")); + + html_message.push_str(&format!( + "" + )) + } + + html_message.push_str("
MXC URIDimensions (if thumbnail)Created/Downloaded atUploaderContent-TypeFilenameSize
mxc://{server_name}/{media_id}{dimensions}{creation}{user_id}{content_type}{filename}{size}
"); + + RoomMessageEventContent::text_html(markdown_message, html_message) + }, AdminCommand::PurgeMedia => media_from_body(body).map_or_else( |message| message, |media| { diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 9f1d48c9..444f5f9a 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -3,7 +3,9 @@ use sha2::{digest::Output, Sha256}; use crate::{config::MediaRetentionConfig, Error, Result}; -use super::{BlockedMediaInfo, DbFileMeta, MediaType}; +use super::{ + BlockedMediaInfo, DbFileMeta, MediaListItem, MediaQuery, MediaType, ServerNameOrUserId, +}; pub trait Data: Send + Sync { #[allow(clippy::too_many_arguments)] @@ -44,6 +46,8 @@ pub trait Data: Send + Sync { height: u32, ) -> Result; + fn query(&self, server_name: &ServerName, media_id: &str) -> Result; + fn purge_and_get_hashes( &self, media: &[(OwnedServerName, String)], @@ -83,6 +87,15 @@ pub trait Data: Send + Sync { fn unblock(&self, media: &[(OwnedServerName, String)]) -> Vec; + fn list( + &self, + server_name_or_user_id: Option, + include_thumbnails: bool, + content_type: Option<&str>, + before: Option, + after: Option, + ) -> Result>; + /// Returns a Vec of: /// - The server the media is from /// - The media id diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index a26e615d..16060dfb 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -28,6 +28,55 @@ use tokio::{ io::{AsyncReadExt, AsyncWriteExt}, }; +pub struct MediaQuery { + pub is_blocked: bool, + pub source_file: Option, + pub thumbnails: Vec, +} + +pub struct MediaQueryFileInfo { + pub uploader_localpart: Option, + pub sha256_hex: String, + pub filename: Option, + pub content_type: Option, + pub unauthenticated_access_permitted: bool, + pub is_blocked_via_filehash: bool, + pub file_info: Option, +} + +pub struct MediaQueryThumbInfo { + pub width: u32, + pub height: u32, + pub sha256_hex: String, + pub filename: Option, + pub content_type: Option, + pub unauthenticated_access_permitted: bool, + pub is_blocked_via_filehash: bool, + pub file_info: Option, +} + +pub struct FileInfo { + pub creation: u64, + pub last_access: u64, + pub size: u64, +} + +pub struct MediaListItem { + pub server_name: OwnedServerName, + pub media_id: String, + pub uploader_localpart: Option, + pub content_type: Option, + pub filename: Option, + pub dimensions: Option<(u32, u32)>, + pub size: u64, + pub creation: u64, +} + +pub enum ServerNameOrUserId { + ServerName(Box), + UserId(Box), +} + pub struct FileMeta { pub content_disposition: ContentDisposition, pub content_type: Option, @@ -382,6 +431,11 @@ impl Service { } } + /// Returns information about the queried media + pub fn query(&self, server_name: &ServerName, media_id: &str) -> Result { + self.db.query(server_name, media_id) + } + /// Purges all of the specified media. /// /// If `force_filehash` is true, all media and/or thumbnails which share sha256 content hashes @@ -477,6 +531,24 @@ impl Service { self.db.unblock(media) } + /// Returns a list of all the stored media, applying all the given filters to the results + pub fn list( + &self, + server_name_or_user_id: Option, + include_thumbnails: bool, + content_type: Option<&str>, + before: Option, + after: Option, + ) -> Result> { + self.db.list( + server_name_or_user_id, + include_thumbnails, + content_type, + before, + after, + ) + } + /// Returns a Vec of: /// - The server the media is from /// - The media id From a189b66ca6eaeef2c51cd249c7074a06f1ec27ec Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Tue, 6 May 2025 00:47:20 +0100 Subject: [PATCH 13/15] feat(admin): show media command --- Cargo.lock | 27 ++++- Cargo.toml | 1 + src/api/client_server/media.rs | 2 +- src/api/client_server/mod.rs | 2 +- src/service/admin/mod.rs | 175 ++++++++++++++++++++++----------- 5 files changed, 145 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9bd25569..af950d48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -396,6 +396,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.6.0" @@ -1401,12 +1407,23 @@ dependencies = [ "byteorder", "color_quant", "gif", + "image-webp", "num-traits", "png", "zune-core", "zune-jpeg", ] +[[package]] +name = "image-webp" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f79afb8cbee2ef20f59ccd477a218c12a93943d075b492015ecb1bb81f8ee904" +dependencies = [ + "byteorder-lite", + "quick-error 2.0.1", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -2107,6 +2124,12 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quote" version = "1.0.36" @@ -2259,7 +2282,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" dependencies = [ "hostname", - "quick-error", + "quick-error 1.2.3", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 511a6f4c..f883be9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,6 +84,7 @@ image = { version = "0.25", default-features = false, features = [ "gif", "jpeg", "png", + "webp", ] } # Used for creating media filenames hex = "0.4" diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index d4c8738d..4b55a511 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -193,7 +193,7 @@ pub async fn get_content_auth_route( get_content(&body.server_name, body.media_id.clone(), true, true).await } -async fn get_content( +pub async fn get_content( server_name: &ServerName, media_id: String, allow_remote: bool, diff --git a/src/api/client_server/mod.rs b/src/api/client_server/mod.rs index e5d0a5d5..e99bc048 100644 --- a/src/api/client_server/mod.rs +++ b/src/api/client_server/mod.rs @@ -9,7 +9,7 @@ mod device; mod directory; mod filter; mod keys; -mod media; +pub mod media; mod membership; mod message; mod openid; diff --git a/src/service/admin/mod.rs b/src/service/admin/mod.rs index 06e6d047..ba58044a 100644 --- a/src/service/admin/mod.rs +++ b/src/service/admin/mod.rs @@ -9,6 +9,7 @@ use std::{ use bytesize::ByteSize; use chrono::DateTime; use clap::{Args, Parser}; +use image::GenericImageView; use regex::Regex; use ruma::{ api::appservice::Registration, @@ -20,21 +21,25 @@ use ruma::{ history_visibility::{HistoryVisibility, RoomHistoryVisibilityEventContent}, join_rules::{JoinRule, RoomJoinRulesEventContent}, member::{MembershipState, RoomMemberEventContent}, - message::RoomMessageEventContent, + message::{ + FileMessageEventContent, ImageMessageEventContent, MessageType, + RoomMessageEventContent, + }, name::RoomNameEventContent, power_levels::RoomPowerLevelsEventContent, topic::RoomTopicEventContent, + MediaSource, }, TimelineEventType, }, - EventId, MilliSecondsSinceUnixEpoch, MxcUri, OwnedRoomAliasId, OwnedRoomId, OwnedServerName, - RoomAliasId, RoomId, RoomVersionId, ServerName, UserId, + EventId, MilliSecondsSinceUnixEpoch, MxcUri, OwnedMxcUri, OwnedRoomAliasId, OwnedRoomId, + OwnedServerName, RoomAliasId, RoomId, RoomVersionId, ServerName, UserId, }; use serde_json::value::to_raw_value; use tokio::sync::{mpsc, Mutex, RwLock}; use crate::{ - api::client_server::{leave_all_rooms, AUTO_GEN_PASSWORD_LENGTH}, + api::client_server::{self, leave_all_rooms, AUTO_GEN_PASSWORD_LENGTH}, services, utils::{self, HtmlEscape}, Error, PduEvent, Result, @@ -42,7 +47,7 @@ use crate::{ use super::{ media::{ - BlockedMediaInfo, FileInfo, MediaListItem, MediaQuery, MediaQueryFileInfo, + size, BlockedMediaInfo, FileInfo, MediaListItem, MediaQuery, MediaQueryFileInfo, MediaQueryThumbInfo, ServerNameOrUserId, }, pdu::PduBuilder, @@ -138,6 +143,12 @@ enum AdminCommand { mxc: Box, }, + /// Sends a message with the requested media attached, so that you can view it easily + ShowMedia { + /// The MXC URI of the media you want to view + mxc: Box, + }, + /// Lists all the media matching the specified requirements ListMedia { #[command(flatten)] @@ -439,8 +450,8 @@ impl Service { tokio::select! { Some(event) = receiver.recv() => { let message_content = match event { - AdminRoomEvent::SendMessage(content) => content, - AdminRoomEvent::ProcessMessage(room_message) => self.process_admin_message(room_message).await + AdminRoomEvent::SendMessage(content) => content.into(), + AdminRoomEvent::ProcessMessage(room_message) => self.process_admin_message(room_message).await, }; let mutex_state = Arc::clone( @@ -491,7 +502,7 @@ impl Service { } // Parse and process a message from the admin room - async fn process_admin_message(&self, room_message: String) -> RoomMessageEventContent { + async fn process_admin_message(&self, room_message: String) -> MessageType { let mut lines = room_message.lines().filter(|l| !l.trim().is_empty()); let command_line = lines.next().expect("each string has at least one line"); let body: Vec<_> = lines.collect(); @@ -503,7 +514,7 @@ impl Service { let message = error.replace("server.name", server_name.as_str()); let html_message = self.usage_to_html(&message, server_name); - return RoomMessageEventContent::text_html(message, html_message); + return RoomMessageEventContent::text_html(message, html_message).into(); } }; @@ -519,7 +530,7 @@ impl Service {
\n{error}\n
", ); - RoomMessageEventContent::text_html(markdown_message, html_message) + RoomMessageEventContent::text_html(markdown_message, html_message).into() } } } @@ -550,7 +561,7 @@ impl Service { &self, command: AdminCommand, body: Vec<&str>, - ) -> Result { + ) -> Result { let reply_message_content = match command { AdminCommand::RegisterAppservice => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" @@ -574,7 +585,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::UnregisterAppservice { appservice_identifier, @@ -587,7 +598,7 @@ impl Service { Err(e) => RoomMessageEventContent::text_plain(format!( "Failed to unregister appservice: {e}" )), - }, + }.into(), AdminCommand::ListAppservices => { let appservices = services().appservice.iter_ids().await; let output = format!( @@ -595,7 +606,7 @@ impl Service { appservices.len(), appservices.join(", ") ); - RoomMessageEventContent::text_plain(output) + RoomMessageEventContent::text_plain(output).into() } AdminCommand::ListRooms => { let room_ids = services().rooms.metadata.iter_ids(); @@ -616,7 +627,7 @@ impl Service { .collect::>() .join("\n") ); - RoomMessageEventContent::text_plain(output) + RoomMessageEventContent::text_plain(output).into() } AdminCommand::ListLocalUsers => match services().users.list_local_users() { Ok(users) => { @@ -625,7 +636,7 @@ impl Service { RoomMessageEventContent::text_plain(&msg) } Err(e) => RoomMessageEventContent::text_plain(e.to_string()), - }, + }.into(), AdminCommand::IncomingFederation => { let map = services().globals.roomid_federationhandletime.read().await; let mut msg: String = format!("Handling {} incoming pdus:\n", map.len()); @@ -640,7 +651,7 @@ impl Service { elapsed.as_secs() % 60 ); } - RoomMessageEventContent::text_plain(&msg) + RoomMessageEventContent::text_plain(&msg).into() } AdminCommand::GetAuthChain { event_id } => { let event_id = Arc::::from(event_id); @@ -666,7 +677,7 @@ impl Service { )) } else { RoomMessageEventContent::text_plain("Event not found.") - } + }.into() } AdminCommand::ParsePdu => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" @@ -700,7 +711,7 @@ impl Service { } } else { RoomMessageEventContent::text_plain("Expected code block in command body.") - } + }.into() } AdminCommand::GetPdu { event_id } => { let mut outlier = false; @@ -738,7 +749,7 @@ impl Service { ) } None => RoomMessageEventContent::text_plain("PDU not found."), - } + }.into() } AdminCommand::MemoryUsage => { let response1 = services().memory_usage().await; @@ -746,21 +757,21 @@ impl Service { RoomMessageEventContent::text_plain(format!( "Services:\n{response1}\n\nDatabase:\n{response2}" - )) + )).into() } AdminCommand::ClearDatabaseCaches { amount } => { services().globals.db.clear_caches(amount); - RoomMessageEventContent::text_plain("Done.") + RoomMessageEventContent::text_plain("Done.").into() } AdminCommand::ClearServiceCaches { amount } => { services().clear_caches(amount).await; - RoomMessageEventContent::text_plain("Done.") + RoomMessageEventContent::text_plain("Done.").into() } AdminCommand::ShowConfig => { // Construct and send the response - RoomMessageEventContent::text_plain(format!("{}", services().globals.config)) + RoomMessageEventContent::text_plain(format!("{}", services().globals.config)).into() } AdminCommand::ResetPassword { username } => { let user_id = match UserId::parse_with_server_name( @@ -771,7 +782,7 @@ impl Service { Err(e) => { return Ok(RoomMessageEventContent::text_plain(format!( "The supplied username is not a valid username: {e}" - ))) + )).into()) } }; @@ -779,7 +790,7 @@ impl Service { if user_id.server_name() != services().globals.server_name() { return Ok(RoomMessageEventContent::text_plain( "The specified user is not from this server!", - )); + ).into()); }; // Check if the specified user is valid @@ -793,7 +804,7 @@ impl Service { { return Ok(RoomMessageEventContent::text_plain( "The specified user does not exist!", - )); + ).into()); } let new_password = utils::random_string(AUTO_GEN_PASSWORD_LENGTH); @@ -808,7 +819,7 @@ impl Service { Err(e) => RoomMessageEventContent::text_plain(format!( "Couldn't reset the password for user {user_id}: {e}" )), - } + }.into() } AdminCommand::CreateUser { username, password } => { let password = @@ -822,7 +833,7 @@ impl Service { Err(e) => { return Ok(RoomMessageEventContent::text_plain(format!( "The supplied username is not a valid username: {e}" - ))) + )).into()) } }; @@ -830,18 +841,18 @@ impl Service { if user_id.server_name() != services().globals.server_name() { return Ok(RoomMessageEventContent::text_plain( "The specified user is not from this server!", - )); + ).into()); }; if user_id.is_historical() { return Ok(RoomMessageEventContent::text_plain(format!( "Userid {user_id} is not allowed due to historical" - ))); + )).into()); } if services().users.exists(&user_id)? { return Ok(RoomMessageEventContent::text_plain(format!( "Userid {user_id} already exists" - ))); + )).into()); } // Create user services().users.create(&user_id, Some(password.as_str()))?; @@ -878,7 +889,7 @@ impl Service { // Inhibit login does not work for guests RoomMessageEventContent::text_plain(format!( "Created user with user_id: {user_id} and password: {password}" - )) + )).into() } AdminCommand::AllowRegistration { status } => { if let Some(status) = status { @@ -896,15 +907,15 @@ impl Service { "Registration is currently disabled" }, ) - } + }.into() } AdminCommand::DisableRoom { room_id } => { services().rooms.metadata.disable_room(&room_id, true)?; - RoomMessageEventContent::text_plain("Room disabled.") + RoomMessageEventContent::text_plain("Room disabled.").into() } AdminCommand::EnableRoom { room_id } => { services().rooms.metadata.disable_room(&room_id, false)?; - RoomMessageEventContent::text_plain("Room enabled.") + RoomMessageEventContent::text_plain("Room enabled.").into() } AdminCommand::DeactivateUser { leave_rooms, @@ -954,7 +965,7 @@ impl Service { "User {user_id} has been deactivated, but {failed_purged_media} media failed to be purged, check the logs for more details" )) } - } + }.into() } AdminCommand::DeactivateAll { leave_rooms, @@ -1027,11 +1038,11 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::QueryMedia { mxc } => { let Ok((server_name, media_id)) = mxc.parts() else { - return Ok(RoomMessageEventContent::text_plain("Invalid media MXC")); + return Ok(RoomMessageEventContent::text_plain("Invalid media MXC").into()); }; let MediaQuery{ is_blocked, source_file, thumbnails } = services().media.query(server_name, media_id)?; @@ -1118,7 +1129,55 @@ impl Service { } } - RoomMessageEventContent::text_plain(message) + RoomMessageEventContent::text_plain(message).into() + } + AdminCommand::ShowMedia { mxc } => { + let Ok((server_name, media_id)) = mxc.parts() else { + return Ok(RoomMessageEventContent::text_plain("Invalid media MXC").into()); + }; + + // TODO: Bypass blocking once MSC3911 is implemented (linking media to events) + let ruma::api::client::authenticated_media::get_content::v1::Response { + file, + content_type, + content_disposition, + } = client_server::media::get_content(server_name, media_id.to_owned(), true, true).await?; + + if let Ok(image) = image::load_from_memory(&file) { + let filename = content_disposition.and_then(|cd| cd.filename); + let (width, height) = image.dimensions(); + + MessageType::Image(ImageMessageEventContent { + body: filename.clone().unwrap_or_default(), + formatted: None, + filename, + source: MediaSource::Plain(OwnedMxcUri::from(mxc.to_owned())), + info: Some(Box::new(ruma::events::room::ImageInfo { + height: Some(height.into()), + width: Some(width.into()), + mimetype: content_type, + size: size(&file)?.try_into().ok(), + thumbnail_info: None, + thumbnail_source: None, + blurhash: None, + })), + }) + } else { + let filename = content_disposition.and_then(|cd| cd.filename); + + MessageType::File(FileMessageEventContent { + body: filename.clone().unwrap_or_default(), + formatted: None, + filename, + source: MediaSource::Plain(OwnedMxcUri::from(mxc.to_owned())), + info: Some(Box::new(ruma::events::room::message::FileInfo { + mimetype: content_type, + size: size(&file)?.try_into().ok(), + thumbnail_info: None, + thumbnail_source: None, + })), + }) + } } AdminCommand::ListMedia { user_server_filter: ListMediaArgs { @@ -1183,7 +1242,7 @@ impl Service { html_message.push_str(""); - RoomMessageEventContent::text_html(markdown_message, html_message) + RoomMessageEventContent::text_html(markdown_message, html_message).into() }, AdminCommand::PurgeMedia => media_from_body(body).map_or_else( |message| message, @@ -1196,7 +1255,7 @@ impl Service { RoomMessageEventContent::text_plain(format!( "Failed to delete {failed_count} media, check logs for more details" )) - } + }.into() }, ), AdminCommand::PurgeMediaFromUsers { @@ -1232,7 +1291,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::PurgeMediaFromServer { server_id: server_name, @@ -1260,7 +1319,7 @@ impl Service { RoomMessageEventContent::text_plain(format!( "Failed to purge {failed_count} media, check logs for more details" )) - } + }.into() } AdminCommand::BlockMedia { and_purge, reason } => media_from_body(body).map_or_else( |message| message, @@ -1283,7 +1342,7 @@ impl Service { (false, false) => RoomMessageEventContent::text_plain(format!( "Failed to block {failed_count}, and purge {failed_purge_count} media, check logs for more details" )) - } + }.into() }, ), AdminCommand::BlockMediaFromUsers { from_last, reason } => { @@ -1321,7 +1380,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::ListBlockedMedia => { let mut markdown_message = String::from( @@ -1361,7 +1420,7 @@ impl Service { html_message.push_str(""); - RoomMessageEventContent::text_html(markdown_message, html_message) + RoomMessageEventContent::text_html(markdown_message, html_message).into() } AdminCommand::UnblockMedia => media_from_body(body).map_or_else( |message| message, @@ -1374,7 +1433,7 @@ impl Service { RoomMessageEventContent::text_plain(format!( "Failed to unblock {failed_count} media, check logs for more details" )) - } + }.into() }, ), AdminCommand::SignJson => { @@ -1399,7 +1458,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::VerifyJson => { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" @@ -1460,7 +1519,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::HashAndSignEvent { room_version_id } => { if body.len() > 2 @@ -1490,7 +1549,7 @@ impl Service { RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", ) - } + }.into() } AdminCommand::RemoveAlias { alias } => { if alias.server_name() != services().globals.server_name() { @@ -1515,7 +1574,7 @@ impl Service { .alias .remove_alias(&alias, services().globals.server_user())?; RoomMessageEventContent::text_plain("Alias removed successfully") - } + }.into() } }; @@ -2010,7 +2069,7 @@ impl Service { fn userids_from_body<'a>( body: &'a [&'a str], -) -> Result, RoomMessageEventContent>, Error> { +) -> Result, MessageType>, Error> { let users = body.to_owned().drain(1..body.len() - 1).collect::>(); let mut user_ids = Vec::new(); @@ -2071,15 +2130,14 @@ fn userids_from_body<'a>( return Ok(Err(RoomMessageEventContent::text_html( markdown_message, html_message, - ))); + ) + .into())); } Ok(Ok(user_ids)) } -fn media_from_body( - body: Vec<&str>, -) -> Result, RoomMessageEventContent> { +fn media_from_body(body: Vec<&str>) -> Result, MessageType> { if body.len() > 2 && body[0].trim() == "```" && body.last().unwrap().trim() == "```" { Ok(body .clone() @@ -2094,7 +2152,8 @@ fn media_from_body( } else { Err(RoomMessageEventContent::text_plain( "Expected code block in command body. Add --help for details.", - )) + ) + .into()) } } From 0528eb592860656d74e09bcf4088def7500ebe10 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Wed, 7 May 2025 13:03:55 +0100 Subject: [PATCH 14/15] docs: guide to moderating media --- docs/SUMMARY.md | 2 ++ docs/administration.md | 3 +++ docs/administration/media.md | 30 ++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 docs/administration.md create mode 100644 docs/administration/media.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index afba3cca..c5d504d2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -9,6 +9,8 @@ - [Debian](deploying/debian.md) - [Docker](deploying/docker.md) - [NixOS](deploying/nixos.md) +- [Administration](administration.md) + - [Media](administration/media.md) - [TURN](turn.md) - [Appservices](appservices.md) - [FAQ](faq.md) diff --git a/docs/administration.md b/docs/administration.md new file mode 100644 index 00000000..604954b1 --- /dev/null +++ b/docs/administration.md @@ -0,0 +1,3 @@ +# Administration + +This chapter describes how to perform tasks you may want to do while running Conduit diff --git a/docs/administration/media.md b/docs/administration/media.md new file mode 100644 index 00000000..2df88002 --- /dev/null +++ b/docs/administration/media.md @@ -0,0 +1,30 @@ +# Media + +While running Conduit, you may encounter undesirable media, either from other servers, or from local users. + +## From other servers +If the media originated from a different server, which itself is not malicious, it should be enough +to use the `purge-media-from-server` command to delete the media from the media backend, and then +contact the remote server so that they can deal with the offending user(s). + +If you do not need to media deleted as soon as possible, you can use retention policies to only +store remote media for a short period of time, meaning that the media will be automatically deleted +after some time. As new media can only be accessed over authenticated endpoints, only local users +will be able to access the media via your server, so if you're running a single-user server, you +don't need to worry about the media being distributed via your server. + +If you know the media IDs, (which you can find with the `list-media` command), you can use the +`block-media` to prevent any of those media IDs (or other media with the same SHA256 hash) from +being stored in the media backend in the future. + +If the server itself if malicious, then it should probably be [ACLed](https://spec.matrix.org/v1.14/client-server-api/#server-access-control-lists-acls-for-rooms) +in rooms it particpates in. In the future, you'll be able to block the remote server from +interacting with your server completely. + +## From local users +If the undesirable media originates from your own server, you can purge media uploaded by them +using the `purge-media-from-users` command. If you also plan to deactivate the user, you can do so +with the `--purge-media` flag on either the `deactivate-user` or `deactivate-all` commands. If +they keep making new accounts, you can use the `block-media-from-users` command to prevent media +with the same SHA256 hash from being uploaded again, as well as using the `allow-registration` +command to temporarily prevent users from creating new accounts. From a552a4733954af8a76142ed74207740ef21e5ec2 Mon Sep 17 00:00:00 2001 From: Matthias Ahouansou Date: Thu, 8 May 2025 21:30:27 +0100 Subject: [PATCH 15/15] ci(nix): update attic public key --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1bef6143..81431686 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,7 +26,7 @@ before_script: # Add our own binary cache - if command -v nix > /dev/null; then echo "extra-substituters = https://attic.conduit.rs/conduit" >> /etc/nix/nix.conf; fi - - if command -v nix > /dev/null; then echo "extra-trusted-public-keys = conduit:ddcaWZiWm0l0IXZlO8FERRdWvEufwmd0Negl1P+c0Ns=" >> /etc/nix/nix.conf; fi + - if command -v nix > /dev/null; then echo "extra-trusted-public-keys = conduit:zXpsVmtm+MBbweaCaG/CT4pCEDDjfFAKjgbCqfDBjLE=" >> /etc/nix/nix.conf; fi # Add alternate binary cache - if command -v nix > /dev/null && [ -n "$ATTIC_ENDPOINT" ]; then echo "extra-substituters = $ATTIC_ENDPOINT" >> /etc/nix/nix.conf; fi