From 933cd8a0689d92a8a45e70abaf40e7685662129e Mon Sep 17 00:00:00 2001 From: Shady Khalifa Date: Mon, 24 Apr 2023 22:33:46 +0200 Subject: [PATCH 1/3] add failing test --- src/service/media/mod.rs | 84 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 93937533..f5f499e0 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -224,3 +224,87 @@ impl Service { } } } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + + struct MockedKVDatabase; + + impl Data for MockedKVDatabase { + fn create_file_metadata( + &self, + mxc: String, + width: u32, + height: u32, + content_disposition: Option<&str>, + content_type: Option<&str>, + ) -> Result> { + // copied from src/database/key_value/media.rs + let mut key = mxc.as_bytes().to_vec(); + key.push(0xff); + key.extend_from_slice(&width.to_be_bytes()); + key.extend_from_slice(&height.to_be_bytes()); + key.push(0xff); + key.extend_from_slice( + content_disposition + .as_ref() + .map(|f| f.as_bytes()) + .unwrap_or_default(), + ); + key.push(0xff); + key.extend_from_slice( + content_type + .as_ref() + .map(|c| c.as_bytes()) + .unwrap_or_default(), + ); + + Ok(key) + } + + fn search_file_metadata( + &self, + mxc: String, + width: u32, + height: u32, + ) -> Result<(Option, Option, Vec)> { + todo!() + } + } + + #[tokio::test] + async fn long_file_names_works() { + static DB: MockedKVDatabase = MockedKVDatabase; + let media = Service { db: &DB }; + + let mxc = "mxc://example.com/ascERGshawAWawugaAcauga".to_owned(); + let width = 100; + let height = 100; + let content_disposition = "attachment; filename=\"this is a very long file name with spaces and special characters like äöüß and even emoji like 🦀.png\""; + let content_type = "image/png"; + let key = media + .db + .create_file_metadata( + mxc, + width, + height, + Some(content_disposition), + Some(content_type), + ) + .unwrap(); + let mut r = PathBuf::new(); + r.push("/tmp"); + r.push("media"); + r.push(base64::encode_config(key, base64::URL_SAFE_NO_PAD)); + // Check that the file path is not longer than 255 characters + // (255 is the maximum length of a file path on most file systems) + assert!( + r.to_str().unwrap().len() <= 255, + "File path is too long: {}", + r.to_str().unwrap().len() + ); + } +} From 37952c1a19f996bf258f6106e9210d3343bc946e Mon Sep 17 00:00:00 2001 From: Shady Khalifa Date: Mon, 24 Apr 2023 22:42:10 +0200 Subject: [PATCH 2/3] use sha2 of the key instead of the key directly --- Cargo.lock | 1 + Cargo.toml | 1 + src/service/media/mod.rs | 10 +++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 505c71c9..b3b1498f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,6 +415,7 @@ dependencies = [ "serde_json", "serde_yaml", "sha-1", + "sha2", "thiserror", "thread_local", "threadpool", diff --git a/Cargo.toml b/Cargo.toml index 36ffb13e..84310847 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,6 +88,7 @@ thread_local = "1.1.3" # used for TURN server authentication hmac = "0.12.1" sha-1 = "0.10.0" +sha2 = "0.9" # used for conduit's CLI and admin room command parsing clap = { version = "4.0.11", default-features = false, features = ["std", "derive", "help", "usage", "error-context"] } futures-util = { version = "0.3.17", default-features = false } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index f5f499e0..4f9c35b6 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -229,6 +229,8 @@ impl Service { mod tests { use std::path::PathBuf; + use sha2::Digest; + use super::*; struct MockedKVDatabase; @@ -298,7 +300,13 @@ mod tests { let mut r = PathBuf::new(); r.push("/tmp"); r.push("media"); - r.push(base64::encode_config(key, base64::URL_SAFE_NO_PAD)); + // r.push(base64::encode_config(key, base64::URL_SAFE_NO_PAD)); + // use the sha256 hash of the key as the file name instead of the key itself + // this is because the base64 encoded key can be longer than 255 characters. + r.push(base64::encode_config( + sha2::Sha256::digest(&key), + base64::URL_SAFE_NO_PAD, + )); // Check that the file path is not longer than 255 characters // (255 is the maximum length of a file path on most file systems) assert!( From ca86b1772f5996135831750d47763eebe7bcec1f Mon Sep 17 00:00:00 2001 From: Shady Khalifa Date: Mon, 24 Apr 2023 22:53:19 +0200 Subject: [PATCH 3/3] use the sha2 of the key and add database migrations --- src/database/mod.rs | 21 +++++++++++++++++++++ src/service/globals/mod.rs | 25 +++++++++++++++++++++++-- src/service/media/mod.rs | 6 +++--- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/database/mod.rs b/src/database/mod.rs index e05991d9..0eae027c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -880,6 +880,27 @@ impl KeyValueDatabase { warn!("Migration: 11 -> 12 finished"); } + if services().globals.database_version()? < 13 { + // Move old media files to new names + for (key, _) in db.mediaid_file.iter() { + // we know that this method is deprecated, but we need to use it to migrate the old files + // to the new location + // + // TODO: remove this once we're sure that all users have migrated + #[allow(deprecated)] + let old_path = services().globals.get_media_file_old(&key); + let path = services().globals.get_media_file(&key); + // move the file to the new location + if old_path.exists() { + tokio::fs::rename(&old_path, &path).await?; + } + } + + services().globals.bump_database_version(13)?; + + warn!("Migration: 12 -> 13 finished"); + } + assert_eq!( services().globals.database_version().unwrap(), latest_database_version diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 9206d43f..769a37a9 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -3,6 +3,7 @@ pub use data::Data; use ruma::{ OwnedDeviceId, OwnedEventId, OwnedRoomId, OwnedServerName, OwnedServerSigningKeyId, OwnedUserId, }; +use sha2::Digest; use crate::api::server_server::FedDest; @@ -14,14 +15,16 @@ use ruma::{ }, DeviceId, RoomVersionId, ServerName, UserId, }; -use std::sync::atomic::{self, AtomicBool}; use std::{ collections::{BTreeMap, HashMap}, fs, future::Future, net::{IpAddr, SocketAddr}, path::PathBuf, - sync::{Arc, Mutex, RwLock}, + sync::{ + atomic::{self, AtomicBool}, + Arc, Mutex, RwLock, + }, time::{Duration, Instant}, }; use tokio::sync::{broadcast, watch::Receiver, Mutex as TokioMutex, Semaphore}; @@ -339,6 +342,24 @@ impl Service { } pub fn get_media_file(&self, key: &[u8]) -> PathBuf { + let mut r = PathBuf::new(); + r.push(self.config.database_path.clone()); + r.push("media"); + r.push(base64::encode_config( + // Using the hash of the key as the filename + // This is to prevent the total length of the path from exceeding the maximum length + sha2::Sha256::digest(key), + base64::URL_SAFE_NO_PAD, + )); + r + } + + /// This is the old version of `get_media_file` that uses the key as the filename. + /// + /// This is deprecated and will be removed in a future release. + /// Please use `get_media_file` instead. + #[deprecated(note = "Use get_media_file instead")] + pub fn get_media_file_old(&self, key: &[u8]) -> PathBuf { let mut r = PathBuf::new(); r.push(self.config.database_path.clone()); r.push("media"); diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 4f9c35b6..1b243392 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -269,9 +269,9 @@ mod tests { fn search_file_metadata( &self, - mxc: String, - width: u32, - height: u32, + _mxc: String, + _width: u32, + _height: u32, ) -> Result<(Option, Option, Vec)> { todo!() }