1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-06-27 16:35:59 +00:00

feat(media): use file's sha256 for on-disk name & make directory configurable

In addition, metadata about the file, such as creation time, last access, and
file size, are stored in the database
This commit is contained in:
Matthias Ahouansou 2025-03-16 17:40:55 +00:00
parent 937521fcf1
commit 70d7f77363
No known key found for this signature in database
14 changed files with 840 additions and 286 deletions

8
Cargo.lock generated
View file

@ -499,6 +499,7 @@ dependencies = [
"directories",
"figment",
"futures-util",
"hex",
"hickory-resolver",
"hmac",
"http 1.1.0",
@ -528,6 +529,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"sha-1",
"sha2",
"thiserror 1.0.61",
"thread_local",
"threadpool",
@ -1045,6 +1047,12 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hickory-proto"
version = "0.24.1"

View file

@ -85,6 +85,9 @@ image = { version = "0.25", default-features = false, features = [
"jpeg",
"png",
] }
# Used for creating media filenames
hex = "0.4"
sha2 = "0.10"
# Used to encode server public key
base64 = "0.22"
# Used when hashing the state

View file

@ -57,9 +57,29 @@ The `global` section contains the following fields:
| `turn_uris` | `array` | The TURN URIs | `[]` |
| `turn_secret` | `string` | The TURN secret | `""` |
| `turn_ttl` | `integer` | The TURN TTL in seconds | `86400` |
| `media` | `table` | See the [media configuration](#media) | See the [media configuration](#media) |
| `emergency_password` | `string` | Set a password to login as the `conduit` user in case of emergency | N/A |
| `well_known` | `table` | Used for [delegation](delegation.md) | See [delegation](delegation.md) |
### Media
The `media` table is used to configure how media is stored and where. Currently, there is only one available
backend, that being `filesystem`. The backend can be set using the `backend` field. Example:
```
[global.media]
backend = "filesystem" # the default backend
```
#### Filesystem backend
The filesystem backend has the following fields:
- `path`: The base directory where all the media files will be stored (defaults to
`${database_path}/media`)
##### Example:
```
[global.media]
backend = "filesystem"
path = "/srv/matrix-media"
```
### TLS
The `tls` table contains the following fields:

View file

@ -54,33 +54,33 @@ pub async fn get_media_config_auth_route(
pub async fn create_content_route(
body: Ruma<create_content::v3::Request>,
) -> Result<create_content::v3::Response> {
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
utils::random_string(MXC_LENGTH)
);
let create_content::v3::Request {
filename,
content_type,
file,
..
} = body.body;
let media_id = utils::random_string(MXC_LENGTH);
services()
.media
.create(
mxc.clone(),
Some(
ContentDisposition::new(ContentDispositionType::Inline)
.with_filename(body.filename.clone()),
),
body.content_type.as_deref(),
&body.file,
services().globals.server_name(),
&media_id,
filename.as_deref(),
content_type.as_deref(),
&file,
)
.await?;
Ok(create_content::v3::Response {
content_uri: mxc.into(),
content_uri: (format!("mxc://{}/{}", services().globals.server_name(), media_id)).into(),
blurhash: None,
})
}
pub async fn get_remote_content(
mxc: &str,
server_name: &ServerName,
media_id: String,
) -> Result<get_content::v1::Response, Error> {
@ -120,7 +120,7 @@ pub async fn get_remote_content(
server_name,
media::get_content::v3::Request {
server_name: server_name.to_owned(),
media_id,
media_id: media_id.clone(),
timeout_ms: Duration::from_secs(20),
allow_remote: false,
allow_redirect: true,
@ -140,8 +140,12 @@ pub async fn get_remote_content(
services()
.media
.create(
mxc.to_owned(),
content_response.content_disposition.clone(),
server_name,
&media_id,
content_response
.content_disposition
.as_ref()
.and_then(|cd| cd.filename.as_deref()),
content_response.content_type.as_deref(),
&content_response.file,
)
@ -186,13 +190,11 @@ async fn get_content(
media_id: String,
allow_remote: bool,
) -> Result<get_content::v1::Response, Error> {
let mxc = format!("mxc://{}/{}", server_name, media_id);
if let Ok(Some(FileMeta {
content_disposition,
content_type,
file,
})) = services().media.get(mxc.clone()).await
})) = services().media.get(server_name, &media_id).await
{
Ok(get_content::v1::Response {
file,
@ -200,8 +202,7 @@ async fn get_content(
content_disposition: Some(content_disposition),
})
} else if server_name != services().globals.server_name() && allow_remote {
let remote_content_response =
get_remote_content(&mxc, server_name, media_id.clone()).await?;
let remote_content_response = get_remote_content(server_name, media_id.clone()).await?;
Ok(get_content::v1::Response {
content_disposition: remote_content_response.content_disposition,
@ -262,11 +263,9 @@ async fn get_content_as_filename(
filename: String,
allow_remote: bool,
) -> Result<get_content_as_filename::v1::Response, Error> {
let mxc = format!("mxc://{}/{}", server_name, media_id);
if let Ok(Some(FileMeta {
file, content_type, ..
})) = services().media.get(mxc.clone()).await
})) = services().media.get(server_name, &media_id).await
{
Ok(get_content_as_filename::v1::Response {
file,
@ -277,8 +276,7 @@ async fn get_content_as_filename(
),
})
} else if server_name != services().globals.server_name() && allow_remote {
let remote_content_response =
get_remote_content(&mxc, server_name, media_id.clone()).await?;
let remote_content_response = get_remote_content(server_name, media_id.clone()).await?;
Ok(get_content_as_filename::v1::Response {
content_disposition: Some(
@ -351,8 +349,6 @@ async fn get_content_thumbnail(
animated: Option<bool>,
allow_remote: bool,
) -> Result<get_content_thumbnail::v1::Response, Error> {
let mxc = format!("mxc://{}/{}", server_name, media_id);
if let Some(FileMeta {
file,
content_type,
@ -360,7 +356,8 @@ async fn get_content_thumbnail(
}) = services()
.media
.get_thumbnail(
mxc.clone(),
server_name,
&media_id,
width
.try_into()
.map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Width is invalid."))?,
@ -452,7 +449,12 @@ async fn get_content_thumbnail(
services()
.media
.upload_thumbnail(
mxc,
server_name,
&media_id,
thumbnail_response
.content_disposition
.as_ref()
.and_then(|cd| cd.filename.as_deref()),
thumbnail_response.content_type.as_deref(),
width.try_into().expect("all UInts are valid u32s"),
height.try_into().expect("all UInts are valid u32s"),

View file

@ -2221,17 +2221,14 @@ pub async fn create_invite_route(
pub async fn get_content_route(
body: Ruma<get_content::v1::Request>,
) -> Result<get_content::v1::Response> {
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
body.media_id
);
if let Some(FileMeta {
content_disposition,
content_type,
file,
}) = services().media.get(mxc.clone()).await?
}) = services()
.media
.get(services().globals.server_name(), &body.media_id)
.await?
{
Ok(get_content::v1::Response::new(
ContentMetadata::new(),
@ -2252,12 +2249,6 @@ pub async fn get_content_route(
pub async fn get_content_thumbnail_route(
body: Ruma<get_content_thumbnail::v1::Request>,
) -> Result<get_content_thumbnail::v1::Response> {
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
body.media_id
);
let Some(FileMeta {
file,
content_type,
@ -2265,7 +2256,8 @@ pub async fn get_content_thumbnail_route(
}) = services()
.media
.get_thumbnail(
mxc.clone(),
services().globals.server_name(),
&body.media_id,
body.width
.try_into()
.map_err(|_| Error::BadRequest(ErrorKind::InvalidParam, "Width is invalid."))?,
@ -2281,7 +2273,9 @@ pub async fn get_content_thumbnail_route(
services()
.media
.upload_thumbnail(
mxc,
services().globals.server_name(),
&body.media_id,
content_disposition.filename.as_deref(),
content_type.as_deref(),
body.width.try_into().expect("all UInts are valid u32s"),
body.height.try_into().expect("all UInts are valid u32s"),

View file

@ -2,6 +2,7 @@ use std::{
collections::BTreeMap,
fmt,
net::{IpAddr, Ipv4Addr},
path::PathBuf,
};
use ruma::{OwnedServerName, RoomVersionId};
@ -81,6 +82,9 @@ pub struct IncompleteConfig {
pub turn: Option<TurnConfig>,
#[serde(default)]
pub media: IncompleteMediaConfig,
pub emergency_password: Option<String>,
#[serde(flatten)]
@ -125,6 +129,8 @@ pub struct Config {
pub turn: Option<TurnConfig>,
pub media: MediaConfig,
pub emergency_password: Option<String>,
pub catchall: BTreeMap<String, IgnoredAny>,
@ -170,6 +176,7 @@ impl From<IncompleteConfig> for Config {
turn_secret,
turn_ttl,
turn,
media,
emergency_password,
catchall,
} = val;
@ -210,6 +217,21 @@ impl From<IncompleteConfig> for Config {
server: well_known_server,
};
let media = match media {
IncompleteMediaConfig::FileSystem { path } => MediaConfig::FileSystem {
path: path.unwrap_or_else(|| {
// We do this as we don't know if the path has a trailing slash, or even if the
// path separator is a forward or backward slash
[&database_path, "media"]
.iter()
.collect::<PathBuf>()
.into_os_string()
.into_string()
.expect("Both inputs are valid UTF-8")
}),
},
};
Config {
address,
port,
@ -243,6 +265,7 @@ impl From<IncompleteConfig> for Config {
trusted_servers,
log,
turn,
media,
emergency_password,
catchall,
}
@ -286,6 +309,23 @@ pub struct WellKnownConfig {
pub server: OwnedServerName,
}
#[derive(Clone, Debug, Deserialize)]
#[serde(tag = "backend", rename_all = "lowercase")]
pub enum IncompleteMediaConfig {
FileSystem { path: Option<String> },
}
impl Default for IncompleteMediaConfig {
fn default() -> Self {
Self::FileSystem { path: None }
}
}
#[derive(Debug, Clone)]
pub enum MediaConfig {
FileSystem { path: String },
}
const DEPRECATED_KEYS: &[&str] = &[
"cache_capacity",
"turn_username",

View file

@ -1,71 +1,199 @@
use ruma::{api::client::error::ErrorKind, http_headers::ContentDisposition};
use ruma::{api::client::error::ErrorKind, ServerName};
use sha2::{digest::Output, Sha256};
use tracing::error;
use crate::{database::KeyValueDatabase, service, utils, Error, Result};
use crate::{
database::KeyValueDatabase,
service::{self, media::DbFileMeta},
utils, Error, Result,
};
impl service::media::Data for KeyValueDatabase {
fn create_file_metadata(
&self,
mxc: String,
sha256_digest: Output<Sha256>,
file_size: u64,
servername: &ServerName,
media_id: &str,
filename: Option<&str>,
content_type: Option<&str>,
) -> Result<()> {
let metadata = FilehashMetadata::new(file_size);
self.filehash_metadata
.insert(&sha256_digest, metadata.value())?;
let mut key = sha256_digest.to_vec();
key.extend_from_slice(servername.as_bytes());
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
self.filehash_servername_mediaid.insert(&key, &[])?;
let mut key = servername.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
let mut value = sha256_digest.to_vec();
value.extend_from_slice(filename.map(|f| f.as_bytes()).unwrap_or_default());
value.push(0xff);
value.extend_from_slice(content_type.map(|f| f.as_bytes()).unwrap_or_default());
self.servernamemediaid_metadata.insert(&key, &value)
}
fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result<DbFileMeta> {
let mut key = servername.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
let value = self
.servernamemediaid_metadata
.get(&key)?
.ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))?;
let metadata = parse_metadata(&value).inspect_err(|e| {
error!("Error parsing metadata for \"mxc://{servername}/{media_id}\" from servernamemediaid_metadata: {e}");
})?;
// Only assume file is available if there is metadata about the filehash itself
self.filehash_metadata
.get(&metadata.sha256_digest)?
.map(|_| metadata)
.ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))
}
fn create_thumbnail_metadata(
&self,
sha256_digest: Output<Sha256>,
file_size: u64,
servername: &ServerName,
media_id: &str,
width: u32,
height: u32,
content_disposition: &ContentDisposition,
filename: Option<&str>,
content_type: Option<&str>,
) -> Result<Vec<u8>> {
let mut key = mxc.as_bytes().to_vec();
) -> Result<()> {
let metadata = FilehashMetadata::new(file_size);
self.filehash_metadata
.insert(&sha256_digest, metadata.value())?;
let mut key = sha256_digest.to_vec();
key.extend_from_slice(servername.as_bytes());
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
key.push(0xff);
key.extend_from_slice(&width.to_be_bytes());
key.extend_from_slice(&height.to_be_bytes());
key.push(0xff);
key.extend_from_slice(content_disposition.to_string().as_bytes());
key.push(0xff);
key.extend_from_slice(
content_type
.as_ref()
.map(|c| c.as_bytes())
.unwrap_or_default(),
);
self.mediaid_file.insert(&key, &[])?;
self.filehash_thumbnailid.insert(&key, &[])?;
Ok(key)
let mut key = servername.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
key.push(0xff);
key.extend_from_slice(&width.to_be_bytes());
key.extend_from_slice(&height.to_be_bytes());
let mut value = sha256_digest.to_vec();
value.extend_from_slice(filename.map(|f| f.as_bytes()).unwrap_or_default());
value.push(0xff);
value.extend_from_slice(content_type.map(|f| f.as_bytes()).unwrap_or_default());
self.thumbnailid_metadata.insert(&key, &value)
}
fn search_file_metadata(
fn search_thumbnail_metadata(
&self,
mxc: String,
servername: &ServerName,
media_id: &str,
width: u32,
height: u32,
) -> Result<(ContentDisposition, Option<String>, Vec<u8>)> {
let mut prefix = mxc.as_bytes().to_vec();
prefix.push(0xff);
prefix.extend_from_slice(&width.to_be_bytes());
prefix.extend_from_slice(&height.to_be_bytes());
prefix.push(0xff);
) -> Result<DbFileMeta> {
let mut key = servername.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
key.push(0xff);
key.extend_from_slice(&width.to_be_bytes());
key.extend_from_slice(&height.to_be_bytes());
let (key, _) = self
.mediaid_file
.scan_prefix(prefix)
.next()
.ok_or(Error::BadRequest(ErrorKind::NotFound, "Media not found"))?;
let value = self
.thumbnailid_metadata
.get(&key)?
.ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))?;
let mut parts = key.rsplit(|&b| b == 0xff);
let metadata = parse_metadata(&value).inspect_err(|e| {
error!("Error parsing metadata for thumbnail \"mxc://{servername}/{media_id}\" with dimensions {width}x{height} from thumbnailid_metadata: {e}");
})?;
let content_type = parts
.next()
.map(|bytes| {
utils::string_from_bytes(bytes).map_err(|_| {
Error::bad_database("Content type in mediaid_file is invalid unicode.")
})
})
.transpose()?;
let content_disposition_bytes = parts
.next()
.ok_or_else(|| Error::bad_database("Media ID in db is invalid."))?;
let content_disposition = content_disposition_bytes.try_into().unwrap_or_else(|_| {
ContentDisposition::new(ruma::http_headers::ContentDispositionType::Inline)
});
Ok((content_disposition, content_type, key))
// Only assume file is available if there is metadata about the filehash itself
self.filehash_metadata
.get(&metadata.sha256_digest)?
.map(|_| metadata)
.ok_or_else(|| Error::BadRequest(ErrorKind::NotFound, "Media not found."))
}
}
fn parse_metadata(value: &[u8]) -> Result<DbFileMeta> {
let (sha256_digest, mut parts) = value
.split_at_checked(32)
.map(|(digest, value)| (digest.to_vec(), value.split(|&b| b == 0xff)))
.ok_or_else(|| Error::BadDatabase("Invalid format for media metadata"))?;
let filename = parts
.next()
.map(|bytes| {
utils::string_from_bytes(bytes)
.map_err(|_| Error::BadDatabase("filename in media metadata is invalid unicode"))
})
.transpose()?
.and_then(|s| (!s.is_empty()).then_some(s));
let content_type = parts
.next()
.map(|bytes| {
utils::string_from_bytes(bytes).map_err(|_| {
Error::BadDatabase("content type in media metadata is invalid unicode")
})
})
.transpose()?
.and_then(|s| (!s.is_empty()).then_some(s));
let unauthenticated_access_permitted = parts.next().is_some_and(|v| v.is_empty());
Ok(DbFileMeta {
sha256_digest,
filename,
content_type,
unauthenticated_access_permitted,
})
}
pub struct FilehashMetadata {
value: Vec<u8>,
}
impl FilehashMetadata {
pub fn new_with_times(size: u64, creation: u64, last_access: u64) -> Self {
let mut value = size.to_be_bytes().to_vec();
value.extend_from_slice(&creation.to_be_bytes());
value.extend_from_slice(&last_access.to_be_bytes());
Self { value }
}
pub fn new(size: u64) -> Self {
let now = utils::secs_since_unix_epoch();
let mut value = size.to_be_bytes().to_vec();
value.extend_from_slice(&now.to_be_bytes());
value.extend_from_slice(&now.to_be_bytes());
Self { value }
}
pub fn value(&self) -> &[u8] {
&self.value
}
}

View file

@ -3,7 +3,7 @@ mod account_data;
mod appservice;
mod globals;
mod key_backups;
mod media;
pub(super) mod media;
//mod pdu;
mod pusher;
mod rooms;

View file

@ -2,12 +2,13 @@ pub mod abstraction;
pub mod key_value;
use crate::{
service::rooms::timeline::PduCount, services, utils, Config, Error, PduEvent, Result, Services,
SERVICES,
service::{globals, rooms::timeline::PduCount},
services, utils, Config, Error, PduEvent, Result, Services, SERVICES,
};
use abstraction::{KeyValueDatabaseEngine, KvTree};
use base64::{engine::general_purpose, Engine};
use directories::ProjectDirs;
use key_value::media::FilehashMetadata;
use lru_cache::LruCache;
use ruma::{
@ -17,23 +18,50 @@ use ruma::{
GlobalAccountDataEvent, GlobalAccountDataEventType, StateEventType,
},
push::Ruleset,
CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedRoomId, OwnedUserId, RoomId,
UserId,
CanonicalJsonValue, EventId, OwnedDeviceId, OwnedEventId, OwnedMxcUri, OwnedRoomId,
OwnedUserId, RoomId, UserId,
};
use serde::Deserialize;
use sha2::{Digest, Sha256};
use std::{
collections::{BTreeMap, HashMap, HashSet},
fs::{self, remove_dir_all},
io::Write,
mem::size_of,
path::Path,
path::{Path, PathBuf},
sync::{Arc, Mutex, RwLock},
time::Duration,
time::{Duration, UNIX_EPOCH},
};
use tokio::time::interval;
use tokio::{io::AsyncReadExt, time::interval};
use tracing::{debug, error, info, warn};
/// This trait should only be used for migrations, and hence should never be made "pub"
trait GlobalsMigrationsExt {
/// As the name states, old version of `get_media_file`, only for usage in migrations
fn get_media_file_old_only_use_for_migrations(&self, key: &[u8]) -> PathBuf;
/// As the name states, this should only be used for migrations.
fn get_media_folder_only_use_for_migrations(&self) -> PathBuf;
}
impl GlobalsMigrationsExt for globals::Service {
fn get_media_file_old_only_use_for_migrations(&self, key: &[u8]) -> PathBuf {
let mut r = PathBuf::new();
r.push(self.config.database_path.clone());
r.push("media");
r.push(general_purpose::URL_SAFE_NO_PAD.encode(key));
r
}
fn get_media_folder_only_use_for_migrations(&self) -> PathBuf {
let mut r = PathBuf::new();
r.push(self.config.database_path.clone());
r.push("media");
r
}
}
pub struct KeyValueDatabase {
_db: Arc<dyn KeyValueDatabaseEngine>,
@ -148,7 +176,11 @@ pub struct KeyValueDatabase {
pub(super) roomusertype_roomuserdataid: Arc<dyn KvTree>, // RoomUserType = Room + User + Type
//pub media: media::Media,
pub(super) mediaid_file: Arc<dyn KvTree>, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType
pub(super) servernamemediaid_metadata: Arc<dyn KvTree>, // Servername + MediaID -> content sha256 + Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints
pub(super) filehash_servername_mediaid: Arc<dyn KvTree>, // sha256 of content + Servername + MediaID, used to delete dangling references to filehashes from servernamemediaid
pub(super) filehash_metadata: Arc<dyn KvTree>, // sha256 of content -> file size + creation time + last access time
pub(super) thumbnailid_metadata: Arc<dyn KvTree>, // ThumbnailId = Servername + MediaID + width + height -> Filename + ContentType + extra 0xff byte if media is allowed on unauthenticated endpoints
pub(super) filehash_thumbnailid: Arc<dyn KvTree>, // sha256 of content + "ThumbnailId", as defined above. Used to dangling references to filehashes from thumbnailIds
//pub key_backups: key_backups::KeyBackups,
pub(super) backupid_algorithm: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
pub(super) backupid_etag: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
@ -352,7 +384,11 @@ impl KeyValueDatabase {
referencedevents: builder.open_tree("referencedevents")?,
roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?,
roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?,
mediaid_file: builder.open_tree("mediaid_file")?,
servernamemediaid_metadata: builder.open_tree("servernamemediaid_metadata")?,
filehash_servername_mediaid: builder.open_tree("filehash_servername_mediaid")?,
filehash_metadata: builder.open_tree("filehash_metadata")?,
thumbnailid_metadata: builder.open_tree("thumbnailid_metadata")?,
filehash_thumbnailid: builder.open_tree("filehash_thumbnailid")?,
backupid_algorithm: builder.open_tree("backupid_algorithm")?,
backupid_etag: builder.open_tree("backupid_etag")?,
backupkeyid_backup: builder.open_tree("backupkeyid_backup")?,
@ -415,7 +451,7 @@ impl KeyValueDatabase {
}
// If the database has any data, perform data migrations before starting
let latest_database_version = 16;
let latest_database_version = 17;
if services().users.count()? > 0 {
// MIGRATIONS
@ -462,16 +498,19 @@ impl KeyValueDatabase {
}
if services().globals.database_version()? < 3 {
let tree = db._db.open_tree("mediaid_file")?;
// Move media to filesystem
for (key, content) in db.mediaid_file.iter() {
for (key, content) in tree.iter() {
if content.is_empty() {
continue;
}
let path = services().globals.get_media_file(&key);
let path = services()
.globals
.get_media_file_old_only_use_for_migrations(&key);
let mut file = fs::File::create(path)?;
file.write_all(&content)?;
db.mediaid_file.insert(&key, &[])?;
tree.insert(&key, &[])?;
}
services().globals.bump_database_version(3)?;
@ -933,16 +972,23 @@ impl KeyValueDatabase {
}
if services().globals.database_version()? < 16 {
let tree = db._db.open_tree("mediaid_file")?;
// Reconstruct all media using the filesystem
db.mediaid_file.clear().unwrap();
tree.clear().unwrap();
for file in fs::read_dir(services().globals.get_media_folder()).unwrap() {
for file in fs::read_dir(
services()
.globals
.get_media_folder_only_use_for_migrations(),
)
.unwrap()
{
let file = file.unwrap();
let file_name = file.file_name().into_string().unwrap();
let mediaid = general_purpose::URL_SAFE_NO_PAD.decode(&file_name).unwrap();
if let Err(e) = migrate_content_disposition_format(mediaid, db) {
if let Err(e) = migrate_content_disposition_format(mediaid, &tree) {
error!("Error migrating media file with name \"{file_name}\": {e}");
return Err(e);
}
@ -952,6 +998,55 @@ impl KeyValueDatabase {
warn!("Migration: 13 -> 16 finished");
}
if services().globals.database_version()? < 17 {
warn!("Migrating media repository to new format. If you have a lot of media stored, this may take a while, so please be patiant!");
let tree = db._db.open_tree("mediaid_file")?;
tree.clear().unwrap();
let mxc_prefix = general_purpose::URL_SAFE_NO_PAD.encode(b"mxc://");
for file in fs::read_dir(
services()
.globals
.get_media_folder_only_use_for_migrations(),
)
.unwrap()
.filter_map(Result::ok)
.filter(|result| {
result.file_type().unwrap().is_file()
&& result
.file_name()
.to_str()
.unwrap()
.starts_with(&mxc_prefix)
}) {
let file_name = file.file_name().into_string().unwrap();
if let Err(e) = migrate_to_sha256_media(
db,
&file_name,
file.metadata()
.ok()
.and_then(|meta| meta.created().ok())
.and_then(|time| time.duration_since(UNIX_EPOCH).ok())
.map(|dur| dur.as_secs()),
file.metadata()
.ok()
.and_then(|meta| meta.accessed().ok())
.and_then(|time| time.duration_since(UNIX_EPOCH).ok())
.map(|dur| dur.as_secs()),
)
.await
{
error!("Error migrating media file with name \"{file_name}\": {e}");
return Err(e);
}
}
services().globals.bump_database_version(17)?;
warn!("Migration: 16 -> 17 finished");
}
assert_eq!(
services().globals.database_version().unwrap(),
latest_database_version
@ -1117,7 +1212,7 @@ impl KeyValueDatabase {
fn migrate_content_disposition_format(
mediaid: Vec<u8>,
db: &KeyValueDatabase,
tree: &Arc<dyn KvTree>,
) -> Result<(), Error> {
let mut parts = mediaid.rsplit(|&b| b == 0xff);
let mut removed_bytes = 0;
@ -1153,28 +1248,165 @@ fn migrate_content_disposition_format(
// Some file names are too long. Ignore those.
match fs::rename(
services().globals.get_media_file(&mediaid),
services().globals.get_media_file(&new_key),
services()
.globals
.get_media_file_old_only_use_for_migrations(&mediaid),
services()
.globals
.get_media_file_old_only_use_for_migrations(&new_key),
) {
Ok(_) => {
db.mediaid_file.insert(&new_key, &[])?;
tree.insert(&new_key, &[])?;
}
Err(_) => {
fs::rename(
services().globals.get_media_file(&mediaid),
services().globals.get_media_file(&shorter_key),
services()
.globals
.get_media_file_old_only_use_for_migrations(&mediaid),
services()
.globals
.get_media_file_old_only_use_for_migrations(&shorter_key),
)
.unwrap();
db.mediaid_file.insert(&shorter_key, &[])?;
tree.insert(&shorter_key, &[])?;
}
}
} else {
db.mediaid_file.insert(&mediaid, &[])?;
tree.insert(&mediaid, &[])?;
};
Ok(())
}
async fn migrate_to_sha256_media(
db: &KeyValueDatabase,
file_name: &str,
creation: Option<u64>,
last_accessed: Option<u64>,
) -> Result<()> {
use crate::service::media::size;
let media_info = general_purpose::URL_SAFE_NO_PAD.decode(file_name).unwrap();
let mxc_dimension_splitter_pos = media_info
.iter()
.position(|&b| b == 0xff)
.ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))?;
let mxc = utils::string_from_bytes(&media_info[..mxc_dimension_splitter_pos])
.map(OwnedMxcUri::from)
.map_err(|_| Error::BadDatabase("MXC from file's name is invalid UTF-8."))?;
let (server_name, media_id) = mxc
.parts()
.map_err(|_| Error::BadDatabase("MXC from file's name is invalid."))?;
let width_height = media_info
.get(mxc_dimension_splitter_pos + 1..mxc_dimension_splitter_pos + 9)
.ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))?;
let mut parts = media_info
.get(mxc_dimension_splitter_pos + 10..)
.ok_or_else(|| Error::BadDatabase("Invalid format of media info from file's name"))?
.split(|&b| b == 0xff);
let content_disposition_bytes = parts.next().ok_or_else(|| {
Error::BadDatabase(
"Media ID parsed from file's name is invalid: Missing Content Disposition.",
)
})?;
let content_disposition = content_disposition_bytes.try_into().unwrap_or_else(|_| {
ruma::http_headers::ContentDisposition::new(
ruma::http_headers::ContentDispositionType::Inline,
)
});
let content_type = parts
.next()
.map(|bytes| {
utils::string_from_bytes(bytes)
.map_err(|_| Error::BadDatabase("Content type from file's name is invalid UTF-8."))
})
.transpose()?;
let mut path = services()
.globals
.get_media_folder_only_use_for_migrations();
path.push(file_name);
let mut file = Vec::new();
tokio::fs::File::open(&path)
.await?
.read_to_end(&mut file)
.await?;
let sha256_digest = Sha256::digest(&file);
let mut zero_zero = 0u32.to_be_bytes().to_vec();
zero_zero.extend_from_slice(&0u32.to_be_bytes());
let mut key = sha256_digest.to_vec();
let now = utils::secs_since_unix_epoch();
let metadata = FilehashMetadata::new_with_times(
size(&file)?,
creation.unwrap_or(now),
last_accessed.unwrap_or(now),
);
db.filehash_metadata.insert(&key, metadata.value())?;
// If not a thumbnail
if width_height == zero_zero {
key.extend_from_slice(server_name.as_bytes());
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
db.filehash_servername_mediaid.insert(&key, &[])?;
let mut key = server_name.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
let mut value = sha256_digest.to_vec();
value.extend_from_slice(content_disposition.filename.unwrap_or_default().as_bytes());
value.push(0xff);
value.extend_from_slice(content_type.unwrap_or_default().as_bytes());
// To mark as available on unauthenticated endpoints
value.push(0xff);
db.servernamemediaid_metadata.insert(&key, &value)?;
} else {
key.extend_from_slice(server_name.as_bytes());
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
key.push(0xff);
key.extend_from_slice(width_height);
db.filehash_thumbnailid.insert(&key, &[])?;
let mut key = server_name.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(media_id.as_bytes());
key.push(0xff);
key.extend_from_slice(width_height);
let mut value = sha256_digest.to_vec();
value.extend_from_slice(content_disposition.filename.unwrap_or_default().as_bytes());
value.push(0xff);
value.extend_from_slice(content_type.unwrap_or_default().as_bytes());
// To mark as available on unauthenticated endpoints
value.push(0xff);
db.thumbnailid_metadata.insert(&key, &value)?;
}
crate::service::media::create_file(&hex::encode(sha256_digest), &file).await?;
tokio::fs::remove_file(path).await?;
Ok(())
}
/// Sets the emergency password and push rules for the @conduit account in case emergency password is set
fn set_emergency_access() -> Result<bool> {
let conduit_user = services().globals.server_user();

View file

@ -45,7 +45,7 @@ use tikv_jemallocator::Jemalloc;
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;
static SUB_TABLES: [&str; 2] = ["well_known", "tls"]; // Not doing `proxy` cause setting that with env vars would be a pain
static SUB_TABLES: [&str; 3] = ["well_known", "tls", "media"]; // Not doing `proxy` cause setting that with env vars would be a pain
#[tokio::main]
async fn main() {

View file

@ -7,7 +7,10 @@ use ruma::{
use crate::api::server_server::DestinationResponse;
use crate::{config::TurnConfig, services, Config, Error, Result};
use crate::{
config::{MediaConfig, TurnConfig},
services, Config, Error, Result,
};
use futures_util::FutureExt;
use hickory_resolver::TokioAsyncResolver;
use hyper_util::client::legacy::connect::dns::{GaiResolver, Name as HyperName};
@ -35,8 +38,6 @@ use tokio::sync::{broadcast, watch::Receiver, Mutex, RwLock, Semaphore};
use tower_service::Service as TowerService;
use tracing::{error, info};
use base64::{engine::general_purpose, Engine as _};
type WellKnownMap = HashMap<OwnedServerName, DestinationResponse>;
type TlsNameMap = HashMap<String, (Vec<IpAddr>, u16)>;
type RateLimitState = (Instant, u32); // Time if last failed try, number of failed tries
@ -227,7 +228,11 @@ impl Service {
shutdown: AtomicBool::new(false),
};
fs::create_dir_all(s.get_media_folder())?;
// Remove this exception once other media backends are added
#[allow(irrefutable_let_patterns)]
if let MediaConfig::FileSystem { path } = &s.config.media {
fs::create_dir_all(path)?;
}
if !s
.supported_room_versions()
@ -477,18 +482,13 @@ impl Service {
self.db.bump_database_version(new_version)
}
pub fn get_media_folder(&self) -> PathBuf {
pub fn get_media_path(&self, media_directory: &str, sha256_hex: &str) -> PathBuf {
let mut r = PathBuf::new();
r.push(self.config.database_path.clone());
r.push("media");
r
}
r.push(media_directory);
//TODO: Directory distribution
r.push(sha256_hex);
pub fn get_media_file(&self, key: &[u8]) -> PathBuf {
let mut r = PathBuf::new();
r.push(self.config.database_path.clone());
r.push("media");
r.push(general_purpose::URL_SAFE_NO_PAD.encode(key));
r
}

View file

@ -1,22 +1,43 @@
use ruma::http_headers::ContentDisposition;
use ruma::ServerName;
use sha2::{digest::Output, Sha256};
use crate::Result;
use super::DbFileMeta;
pub trait Data: Send + Sync {
fn create_file_metadata(
&self,
mxc: String,
width: u32,
height: u32,
content_disposition: &ContentDisposition,
sha256_digest: Output<Sha256>,
file_size: u64,
servername: &ServerName,
media_id: &str,
filename: Option<&str>,
content_type: Option<&str>,
) -> Result<Vec<u8>>;
) -> Result<()>;
/// Returns content_disposition, content_type and the metadata key.
fn search_file_metadata(
fn search_file_metadata(&self, servername: &ServerName, media_id: &str) -> Result<DbFileMeta>;
#[allow(clippy::too_many_arguments)]
fn create_thumbnail_metadata(
&self,
mxc: String,
sha256_digest: Output<Sha256>,
file_size: u64,
servername: &ServerName,
media_id: &str,
width: u32,
height: u32,
) -> Result<(ContentDisposition, Option<String>, Vec<u8>)>;
filename: Option<&str>,
content_type: Option<&str>,
) -> Result<()>;
// Returns the sha256 hash, filename and content_type and whether the media should be accessible via
/// unauthenticated endpoints.
fn search_thumbnail_metadata(
&self,
servername: &ServerName,
media_id: &str,
width: u32,
height: u32,
) -> Result<DbFileMeta>;
}

View file

@ -3,16 +3,25 @@ use std::io::Cursor;
pub use data::Data;
use ruma::{
api::client::error::ErrorKind,
api::client::{error::ErrorKind, media::is_safe_inline_content_type},
http_headers::{ContentDisposition, ContentDispositionType},
ServerName,
};
use sha2::{digest::Output, Digest, Sha256};
use crate::{services, Result};
use crate::{config::MediaConfig, services, Error, Result};
use image::imageops::FilterType;
pub struct DbFileMeta {
pub sha256_digest: Vec<u8>,
pub filename: Option<String>,
pub content_type: Option<String>,
pub unauthenticated_access_permitted: bool,
}
use tokio::{
fs::File,
io::{AsyncReadExt, AsyncWriteExt, BufReader},
io::{AsyncReadExt, AsyncWriteExt},
};
pub struct FileMeta {
@ -29,69 +38,70 @@ impl Service {
/// Uploads a file.
pub async fn create(
&self,
mxc: String,
content_disposition: Option<ContentDisposition>,
servername: &ServerName,
media_id: &str,
filename: Option<&str>,
content_type: Option<&str>,
file: &[u8],
) -> Result<()> {
let content_disposition =
content_disposition.unwrap_or(ContentDisposition::new(ContentDispositionType::Inline));
let (sha256_digest, sha256_hex) = generate_digests(file);
// Width, Height = 0 if it's not a thumbnail
let key = self
.db
.create_file_metadata(mxc, 0, 0, &content_disposition, content_type)?;
self.db.create_file_metadata(
sha256_digest,
size(file)?,
servername,
media_id,
filename,
content_type,
)?;
let path = services().globals.get_media_file(&key);
let mut f = File::create(path).await?;
f.write_all(file).await?;
Ok(())
create_file(&sha256_hex, file).await
}
/// Uploads or replaces a file thumbnail.
#[allow(clippy::too_many_arguments)]
pub async fn upload_thumbnail(
&self,
mxc: String,
servername: &ServerName,
media_id: &str,
filename: Option<&str>,
content_type: Option<&str>,
width: u32,
height: u32,
file: &[u8],
) -> Result<()> {
let key = self.db.create_file_metadata(
mxc,
let (sha256_digest, sha256_hex) = generate_digests(file);
self.db.create_thumbnail_metadata(
sha256_digest,
size(file)?,
servername,
media_id,
width,
height,
&ContentDisposition::new(ContentDispositionType::Inline),
filename,
content_type,
)?;
let path = services().globals.get_media_file(&key);
let mut f = File::create(path).await?;
f.write_all(file).await?;
Ok(())
create_file(&sha256_hex, file).await
}
/// Downloads a file.
pub async fn get(&self, mxc: String) -> Result<Option<FileMeta>> {
if let Ok((content_disposition, content_type, key)) =
self.db.search_file_metadata(mxc, 0, 0)
{
let path = services().globals.get_media_file(&key);
let mut file = Vec::new();
BufReader::new(File::open(path).await?)
.read_to_end(&mut file)
.await?;
/// Fetches a local file and it's metadata
pub async fn get(&self, servername: &ServerName, media_id: &str) -> Result<Option<FileMeta>> {
let DbFileMeta {
sha256_digest,
filename,
content_type,
unauthenticated_access_permitted: _,
} = self.db.search_file_metadata(servername, media_id)?;
Ok(Some(FileMeta {
content_disposition,
content_type,
file,
}))
} else {
Ok(None)
}
let file = get_file(&hex::encode(sha256_digest)).await?;
Ok(Some(FileMeta {
content_disposition: content_disposition(filename, &content_type),
content_type,
file,
}))
}
/// Returns width, height of the thumbnail and whether it should be cropped. Returns None when
@ -119,117 +129,206 @@ impl Service {
/// For width,height <= 96 the server uses another thumbnailing algorithm which crops the image afterwards.
pub async fn get_thumbnail(
&self,
mxc: String,
servername: &ServerName,
media_id: &str,
width: u32,
height: u32,
) -> Result<Option<FileMeta>> {
let (width, height, crop) = self
.thumbnail_properties(width, height)
.unwrap_or((0, 0, false)); // 0, 0 because that's the original file
if let Ok((content_disposition, content_type, key)) =
self.db.search_file_metadata(mxc.clone(), width, height)
{
// Using saved thumbnail
let path = services().globals.get_media_file(&key);
let mut file = Vec::new();
File::open(path).await?.read_to_end(&mut file).await?;
Ok(Some(FileMeta {
content_disposition,
if let Some((width, height, crop)) = self.thumbnail_properties(width, height) {
if let Ok(DbFileMeta {
sha256_digest,
filename,
content_type,
file: file.to_vec(),
}))
} else if let Ok((content_disposition, content_type, key)) =
self.db.search_file_metadata(mxc.clone(), 0, 0)
{
// Generate a thumbnail
let path = services().globals.get_media_file(&key);
let mut file = Vec::new();
File::open(path).await?.read_to_end(&mut file).await?;
if let Ok(image) = image::load_from_memory(&file) {
let original_width = image.width();
let original_height = image.height();
if width > original_width || height > original_height {
return Ok(Some(FileMeta {
content_disposition,
content_type,
file: file.to_vec(),
}));
}
let thumbnail = if crop {
image.resize_to_fill(width, height, FilterType::CatmullRom)
} else {
let (exact_width, exact_height) = {
// Copied from image::dynimage::resize_dimensions
let ratio = u64::from(original_width) * u64::from(height);
let nratio = u64::from(width) * u64::from(original_height);
let use_width = nratio <= ratio;
let intermediate = if use_width {
u64::from(original_height) * u64::from(width)
/ u64::from(original_width)
} else {
u64::from(original_width) * u64::from(height)
/ u64::from(original_height)
};
if use_width {
if intermediate <= u64::from(u32::MAX) {
(width, intermediate as u32)
} else {
(
(u64::from(width) * u64::from(u32::MAX) / intermediate) as u32,
u32::MAX,
)
}
} else if intermediate <= u64::from(u32::MAX) {
(intermediate as u32, height)
} else {
(
u32::MAX,
(u64::from(height) * u64::from(u32::MAX) / intermediate) as u32,
)
}
};
image.thumbnail_exact(exact_width, exact_height)
};
let mut thumbnail_bytes = Vec::new();
thumbnail.write_to(
&mut Cursor::new(&mut thumbnail_bytes),
image::ImageFormat::Png,
)?;
// Save thumbnail in database so we don't have to generate it again next time
let thumbnail_key = self.db.create_file_metadata(
mxc,
width,
height,
&content_disposition,
content_type.as_deref(),
)?;
let path = services().globals.get_media_file(&thumbnail_key);
let mut f = File::create(path).await?;
f.write_all(&thumbnail_bytes).await?;
unauthenticated_access_permitted: _,
}) = self
.db
.search_thumbnail_metadata(servername, media_id, width, height)
{
// Using saved thumbnail
let file = get_file(&hex::encode(sha256_digest)).await?;
Ok(Some(FileMeta {
content_disposition,
content_disposition: content_disposition(filename, &content_type),
content_type,
file: thumbnail_bytes.to_vec(),
file,
}))
} else if let Ok(DbFileMeta {
sha256_digest,
filename,
content_type,
unauthenticated_access_permitted: _,
}) = self.db.search_file_metadata(servername, media_id)
{
let content_disposition = content_disposition(filename.clone(), &content_type);
// Generate a thumbnail
let file = get_file(&hex::encode(sha256_digest)).await?;
if let Ok(image) = image::load_from_memory(&file) {
let original_width = image.width();
let original_height = image.height();
if width > original_width || height > original_height {
return Ok(Some(FileMeta {
content_disposition,
content_type,
file,
}));
}
let thumbnail = if crop {
image.resize_to_fill(width, height, FilterType::CatmullRom)
} else {
let (exact_width, exact_height) = {
// Copied from image::dynimage::resize_dimensions
let ratio = u64::from(original_width) * u64::from(height);
let nratio = u64::from(width) * u64::from(original_height);
let use_width = nratio <= ratio;
let intermediate = if use_width {
u64::from(original_height) * u64::from(width)
/ u64::from(original_width)
} else {
u64::from(original_width) * u64::from(height)
/ u64::from(original_height)
};
if use_width {
if intermediate <= u64::from(u32::MAX) {
(width, intermediate as u32)
} else {
(
(u64::from(width) * u64::from(u32::MAX) / intermediate)
as u32,
u32::MAX,
)
}
} else if intermediate <= u64::from(u32::MAX) {
(intermediate as u32, height)
} else {
(
u32::MAX,
(u64::from(height) * u64::from(u32::MAX) / intermediate) as u32,
)
}
};
image.thumbnail_exact(exact_width, exact_height)
};
let mut thumbnail_bytes = Vec::new();
thumbnail.write_to(
&mut Cursor::new(&mut thumbnail_bytes),
image::ImageFormat::Png,
)?;
// Save thumbnail in database so we don't have to generate it again next time
self.upload_thumbnail(
servername,
media_id,
filename.as_deref(),
content_type.as_deref(),
width,
height,
&thumbnail_bytes,
)
.await?;
Ok(Some(FileMeta {
content_disposition,
content_type,
file: thumbnail_bytes,
}))
} else {
// Couldn't parse file to generate thumbnail, likely not an image
Err(Error::BadRequest(
ErrorKind::Unknown,
"Unable to generate thumbnail for the requested content (likely is not an image)",
))
}
} else {
// Couldn't parse file to generate thumbnail, likely not an image
return Err(crate::Error::BadRequest(
ErrorKind::Unknown,
"Unable to generate thumbnail for the requested content (likely is not an image)",
));
Ok(None)
}
} else {
Ok(None)
// Using full-sized file
let Ok(DbFileMeta {
sha256_digest,
filename,
content_type,
unauthenticated_access_permitted: _,
}) = self.db.search_file_metadata(servername, media_id)
else {
return Ok(None);
};
let file = get_file(&hex::encode(sha256_digest)).await?;
Ok(Some(FileMeta {
content_disposition: content_disposition(filename, &content_type),
content_type,
file,
}))
}
}
}
/// Creates the media file, using the configured media backend
///
/// Note: this function does NOT set the metadata related to the file
pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> {
match &services().globals.config.media {
MediaConfig::FileSystem { path } => {
let path = services().globals.get_media_path(path, sha256_hex);
let mut f = File::create(path).await?;
f.write_all(file).await?;
}
}
Ok(())
}
/// Fetches the file from the configured media backend
async fn get_file(sha256_hex: &str) -> Result<Vec<u8>> {
Ok(match &services().globals.config.media {
MediaConfig::FileSystem { path } => {
let path = services().globals.get_media_path(path, sha256_hex);
let mut file = Vec::new();
File::open(path).await?.read_to_end(&mut file).await?;
file
}
})
}
/// Creates a content disposition with the given `filename`, using the `content_type` to determine whether
/// the disposition should be `inline` or `attachment`
fn content_disposition(
filename: Option<String>,
content_type: &Option<String>,
) -> ContentDisposition {
ContentDisposition::new(
if content_type
.as_deref()
.is_some_and(is_safe_inline_content_type)
{
ContentDispositionType::Inline
} else {
ContentDispositionType::Attachment
},
)
.with_filename(filename)
}
/// Returns sha256 digests of the file, in raw (Vec) and hex form respectively
fn generate_digests(file: &[u8]) -> (Output<Sha256>, String) {
let sha256_digest = Sha256::digest(file);
let hex_sha256 = hex::encode(sha256_digest);
(sha256_digest, hex_sha256)
}
/// Get's the file size, is bytes, as u64, returning an error if the file size is larger
/// than a u64 (which is far too big to be reasonably uploaded in the first place anyways)
pub fn size(file: &[u8]) -> Result<u64> {
u64::try_from(file.len())
.map_err(|_| Error::BadRequest(ErrorKind::TooLarge, "File is too large"))
}

View file

@ -18,6 +18,13 @@ pub fn millis_since_unix_epoch() -> u64 {
.as_millis() as u64
}
pub fn secs_since_unix_epoch() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("time is valid")
.as_secs()
}
pub fn increment(old: Option<&[u8]>) -> Option<Vec<u8>> {
let number = match old.map(|bytes| bytes.try_into()) {
Some(Ok(bytes)) => {