1
0
Fork 0
mirror of https://gitlab.com/famedly/conduit.git synced 2025-08-06 17:40:59 +00:00

feat(media): deep hashed directory structure

This commit is contained in:
Matthias Ahouansou 2025-03-23 17:23:57 +00:00
parent 66a14ac802
commit 19d0ea408c
No known key found for this signature in database
5 changed files with 173 additions and 35 deletions

View file

@ -64,7 +64,7 @@ The `global` section contains the following fields:
### Media
The `media` table is used to configure how media is stored and where. Currently, there is only one available
backend, that being `filesystem`. The backend can be set using the `backend` field. Example:
```
```toml
[global.media]
backend = "filesystem" # the default backend
```
@ -73,12 +73,30 @@ backend = "filesystem" # the default backend
The filesystem backend has the following fields:
- `path`: The base directory where all the media files will be stored (defaults to
`${database_path}/media`)
- `directory_structure`: This is a table, used to configure how files are to be distributed within
the media directory. It has the following fields:
- `depth`: The number sub-directories that should be created for files (default: `2`)
- `length`: How long the name of these sub-directories should be (default: `2`)
For example, a file may regularly have the name `98ea6e4f216f2fb4b69fff9b3a44842c38686ca685f3f55dc48c5d3fb1107be4`
(The SHA256 digest of the file's content). If `depth` and `length` were both set to `2`, this file would be stored
at `${path}/98/ea/6e4f216f2fb4b69fff9b3a44842c38686ca685f3f55dc48c5d3fb1107be4`. If you want to instead have all
media files in the base directory with no sub-directories, just set `directory_structure` to be empty, as follows:
```toml
[global.media]
backend = "filesystem"
[global.media.directory_structure]
```
##### Example:
```
```toml
[global.media]
backend = "filesystem"
path = "/srv/matrix-media"
[global.media.directory_structure]
depth = 4
length = 2
```
### TLS

View file

@ -2,6 +2,7 @@ use std::{
collections::BTreeMap,
fmt,
net::{IpAddr, Ipv4Addr},
num::NonZeroU8,
path::PathBuf,
};
@ -10,10 +11,13 @@ use serde::{de::IgnoredAny, Deserialize};
use tracing::warn;
use url::Url;
mod proxy;
use crate::Error;
mod proxy;
use self::proxy::ProxyConfig;
const SHA256_HEX_LENGTH: u8 = 64;
#[derive(Deserialize)]
pub struct IncompleteConfig {
#[serde(default = "default_address")]
@ -218,7 +222,10 @@ impl From<IncompleteConfig> for Config {
};
let media = match media {
IncompleteMediaConfig::FileSystem { path } => MediaConfig::FileSystem {
IncompleteMediaConfig::FileSystem {
path,
directory_structure,
} => MediaConfig::FileSystem {
path: path.unwrap_or_else(|| {
// We do this as we don't know if the path has a trailing slash, or even if the
// path separator is a forward or backward slash
@ -229,6 +236,7 @@ impl From<IncompleteConfig> for Config {
.into_string()
.expect("Both inputs are valid UTF-8")
}),
directory_structure,
},
};
@ -309,21 +317,85 @@ pub struct WellKnownConfig {
pub server: OwnedServerName,
}
#[derive(Clone, Debug, Deserialize)]
#[derive(Deserialize)]
#[serde(tag = "backend", rename_all = "lowercase")]
pub enum IncompleteMediaConfig {
FileSystem { path: Option<String> },
FileSystem {
path: Option<String>,
#[serde(default)]
directory_structure: DirectoryStructure,
},
}
impl Default for IncompleteMediaConfig {
fn default() -> Self {
Self::FileSystem { path: None }
Self::FileSystem {
path: None,
directory_structure: DirectoryStructure::default(),
}
}
}
#[derive(Debug, Clone)]
pub enum MediaConfig {
FileSystem { path: String },
FileSystem {
path: String,
directory_structure: DirectoryStructure,
},
}
#[derive(Debug, Clone, Deserialize)]
// See https://github.com/serde-rs/serde/issues/642#issuecomment-525432907
#[serde(try_from = "ShadowDirectoryStructure", untagged)]
pub enum DirectoryStructure {
// We do this enum instead of Option<DirectoryStructure>, so that we can have the structure be
// deep by default, while still providing a away for it to be flat (by creating an empty table)
//
// e.g.:
// ```toml
// [global.media.directory_structure]
// ```
Flat,
Deep { length: NonZeroU8, depth: NonZeroU8 },
}
impl Default for DirectoryStructure {
fn default() -> Self {
Self::Deep {
length: NonZeroU8::new(2).expect("2 is not 0"),
depth: NonZeroU8::new(2).expect("2 is not 0"),
}
}
}
#[derive(Deserialize)]
#[serde(untagged)]
enum ShadowDirectoryStructure {
Flat {},
Deep { length: NonZeroU8, depth: NonZeroU8 },
}
impl TryFrom<ShadowDirectoryStructure> for DirectoryStructure {
type Error = Error;
fn try_from(value: ShadowDirectoryStructure) -> Result<Self, Self::Error> {
match value {
ShadowDirectoryStructure::Flat {} => Ok(Self::Flat),
ShadowDirectoryStructure::Deep { length, depth } => {
if length
.get()
.checked_mul(depth.get())
.map(|product| product < SHA256_HEX_LENGTH)
// If an overflow occurs, it definitely isn't less than SHA256_HEX_LENGTH
.unwrap_or(false)
{
Ok(Self::Deep { length, depth })
} else {
Err(Error::bad_config("The media directory structure depth multiplied by the depth is equal to or greater than a sha256 hex hash, please reduce at least one of the two so that their product is less than 64"))
}
}
}
}
}
const DEPRECATED_KEYS: &[&str] = &[

View file

@ -47,33 +47,53 @@ static GLOBAL: Jemalloc = Jemalloc;
static SUB_TABLES: [&str; 3] = ["well_known", "tls", "media"]; // Not doing `proxy` cause setting that with env vars would be a pain
// Yeah, I know it's terrible, but since it seems the container users dont want syntax like A[B][C]="...",
// this is what we have to deal with. Also see: https://github.com/SergioBenitez/Figment/issues/12#issuecomment-801449465
static SUB_SUB_TABLES: [&str; 1] = ["directory_structure"];
#[tokio::main]
async fn main() {
clap::parse();
// Initialize config
let raw_config =
Figment::new()
.merge(
Toml::file(Env::var("CONDUIT_CONFIG").expect(
let raw_config = Figment::new()
.merge(
Toml::file(
Env::var("CONDUIT_CONFIG").expect(
"The CONDUIT_CONFIG env var needs to be set. Example: /etc/conduit.toml",
))
.nested(),
),
)
.merge(Env::prefixed("CONDUIT_").global().map(|k| {
let mut key: Uncased = k.into();
.nested(),
)
.merge(Env::prefixed("CONDUIT_").global().map(|k| {
let mut key: Uncased = k.into();
for table in SUB_TABLES {
if k.starts_with(&(table.to_owned() + "_")) {
key = Uncased::from(
table.to_owned() + "." + k[table.len() + 1..k.len()].as_str(),
);
break;
'outer: for table in SUB_TABLES {
if k.starts_with(&(table.to_owned() + "_")) {
for sub_table in SUB_SUB_TABLES {
if k.starts_with(&(table.to_owned() + "_" + sub_table + "_")) {
key = Uncased::from(
table.to_owned()
+ "."
+ sub_table
+ "."
+ k[table.len() + 1 + sub_table.len() + 1..k.len()].as_str(),
);
break 'outer;
}
}
}
key
}));
key = Uncased::from(
table.to_owned() + "." + k[table.len() + 1..k.len()].as_str(),
);
break;
}
}
key
}));
let config = match raw_config.extract::<Config>() {
Ok(s) => s,

View file

@ -8,7 +8,7 @@ use ruma::{
use crate::api::server_server::DestinationResponse;
use crate::{
config::{MediaConfig, TurnConfig},
config::{DirectoryStructure, MediaConfig, TurnConfig},
services, Config, Error, Result,
};
use futures_util::FutureExt;
@ -230,7 +230,7 @@ impl Service {
// Remove this exception once other media backends are added
#[allow(irrefutable_let_patterns)]
if let MediaConfig::FileSystem { path } = &s.config.media {
if let MediaConfig::FileSystem { path, .. } = &s.config.media {
fs::create_dir_all(path)?;
}
@ -482,14 +482,32 @@ impl Service {
self.db.bump_database_version(new_version)
}
pub fn get_media_path(&self, media_directory: &str, sha256_hex: &str) -> PathBuf {
pub fn get_media_path(
&self,
media_directory: &str,
directory_structure: &DirectoryStructure,
sha256_hex: &str,
) -> Result<PathBuf> {
let mut r = PathBuf::new();
r.push(media_directory);
//TODO: Directory distribution
r.push(sha256_hex);
if let DirectoryStructure::Deep { length, depth } = directory_structure {
let mut filename = sha256_hex;
for _ in 0..depth.get() {
let (current_path, next) = filename.split_at(length.get().into());
filename = next;
r.push(current_path);
}
r
// Create all directories leading up to file
fs::create_dir_all(&r).inspect_err(|e| error!("Error creating leading directories for media with sha256 hash of {sha256_hex}: {e}"))?;
r.push(filename);
} else {
r.push(sha256_hex);
}
Ok(r)
}
pub fn shutdown(&self) {

View file

@ -298,8 +298,13 @@ impl Service {
/// Note: this function does NOT set the metadata related to the file
pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> {
match &services().globals.config.media {
MediaConfig::FileSystem { path } => {
let path = services().globals.get_media_path(path, sha256_hex);
MediaConfig::FileSystem {
path,
directory_structure,
} => {
let path = services()
.globals
.get_media_path(path, directory_structure, sha256_hex)?;
let mut f = File::create(path).await?;
f.write_all(file).await?;
@ -312,8 +317,13 @@ pub async fn create_file(sha256_hex: &str, file: &[u8]) -> Result<()> {
/// Fetches the file from the configured media backend
async fn get_file(sha256_hex: &str) -> Result<Vec<u8>> {
Ok(match &services().globals.config.media {
MediaConfig::FileSystem { path } => {
let path = services().globals.get_media_path(path, sha256_hex);
MediaConfig::FileSystem {
path,
directory_structure,
} => {
let path = services()
.globals
.get_media_path(path, directory_structure, sha256_hex)?;
let mut file = Vec::new();
File::open(path).await?.read_to_end(&mut file).await?;