Reduce resource usage of metadata update task

This task previously read all images file's EXIF data every 60s, which
leads to a very large CPU usage for seaf_fuse.

The obvious fix of checking for modifications times and only read the
change files does not work with seaf_fuse, because stat is not cheaper
than actually opening and reading the file.

So instead, we now only read the file list and the modification time of
the image directory itself and update the image metadata only if there
are changes. We also update the metadata at least once every hour to
detect modified files on normal filesystems.

Interestingly, seaf_fuse will update the mtime of a directory if any file
in that directory is changed, so the logic implemented in this commit
will actually detect image changes with seaf_fuse instantly.
This commit is contained in:
Klemens Schölhorn 2024-01-21 17:42:22 +01:00
parent 68c7fd2ac4
commit 48a3a206a3
2 changed files with 163 additions and 74 deletions

125
Cargo.lock generated
View File

@ -27,6 +27,12 @@ dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
@ -378,18 +384,17 @@ dependencies = [
[[package]]
name = "chrono"
version = "0.4.24"
version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"serde",
"time 0.1.45",
"wasm-bindgen",
"winapi",
"windows-targets 0.48.5",
]
[[package]]
@ -437,7 +442,7 @@ dependencies = [
"rand",
"sha2 0.10.6",
"subtle",
"time 0.3.20",
"time",
"version_check",
]
@ -679,7 +684,7 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
dependencies = [
"cfg-if 1.0.0",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasi",
]
[[package]]
@ -1062,7 +1067,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasi",
"windows-sys 0.45.0",
]
@ -1528,17 +1533,6 @@ dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "time"
version = "0.3.20"
@ -1799,12 +1793,6 @@ dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
@ -1911,13 +1899,13 @@ version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_aarch64_gnullvm 0.42.1",
"windows_aarch64_msvc 0.42.1",
"windows_i686_gnu 0.42.1",
"windows_i686_msvc 0.42.1",
"windows_x86_64_gnu 0.42.1",
"windows_x86_64_gnullvm 0.42.1",
"windows_x86_64_msvc 0.42.1",
]
[[package]]
@ -1926,7 +1914,7 @@ version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
"windows-targets 0.42.1",
]
[[package]]
@ -1935,13 +1923,28 @@ version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_aarch64_gnullvm 0.42.1",
"windows_aarch64_msvc 0.42.1",
"windows_i686_gnu 0.42.1",
"windows_i686_msvc 0.42.1",
"windows_x86_64_gnu 0.42.1",
"windows_x86_64_gnullvm 0.42.1",
"windows_x86_64_msvc 0.42.1",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
@ -1950,42 +1953,84 @@ version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "zeroize"
version = "1.5.7"

View File

@ -1,4 +1,4 @@
use std::{net::SocketAddr, path::{Path, PathBuf}, ffi::{OsStr, OsString}, fs::File, io::{BufReader, Seek, Cursor, BufRead}, env::args_os, os::unix::prelude::OsStrExt, cmp::Reverse, sync::{Arc, RwLock}, collections::HashMap};
use std::{net::SocketAddr, path::{Path, PathBuf}, ffi::{OsStr, OsString}, fs::File, io::{BufReader, Seek, Cursor, BufRead}, env::args_os, os::unix::prelude::OsStrExt, cmp::Reverse, sync::{Arc, RwLock}, collections::{HashMap, HashSet}, time::{Instant, Duration}};
use anyhow::{Context, Result, anyhow};
use axum::{Router, routing::{get, post}, response::{IntoResponse, Redirect, Response}, http::{StatusCode, header}, extract::{self, State}, Form, handler::Handler};
@ -118,12 +118,16 @@ async fn main() {
async fn update_config_and_image_list_cache_job(
image_dir: ImageDir,
config: Config,
image_cache: ImageListCache,
image_metadata_cache: ImageListCache,
sessions: MemoryStore,
) {
let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(60));
let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(10));
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
let mut last_images = HashSet::new();
let mut last_image_dir_change = DateTime::<Utc>::UNIX_EPOCH;
let mut last_forced_update = Instant::now();
loop {
interval.tick().await;
@ -158,23 +162,70 @@ async fn update_config_and_image_list_cache_job(
sessions.clear_store().await.ok();
}
// Update image list
let image_dir = image_dir.clone();
let images = task::spawn_blocking(move || {
// TODO: Only update images with changed modification times
read_images(&image_dir)
}).await
.unwrap_or_else(|error| {
tracing::error!("Could not read images due to panic: {:#}", error);
Ok(Vec::new())
})
.unwrap_or_else(|error| {
// Read the modification time of the image dir
// Note that some fuse implementations (e.g. seaf_fuse) will update
// the mtime of the directory when a file inside the directory is
// modified. It might also be very expensive to stat files in such
// a case, so we only record the modification time of the directory.
let image_dir_change = match image_dir.metadata().and_then(|m| m.modified()) {
Ok(modified) => modified.into(),
Err(error) => {
tracing::error!("Could not read modification time of image dir: {:#}", error);
continue
},
};
// Read all image files from the image_dir
let images: HashSet<_> = match image_dir.read_dir() {
Ok(images) => {
// flatten ignores io error while traversing the iterator
images.flatten().filter_map(|entry| {
let path = entry.path();
if path.extension() == Some(OsStr::new("jpg")) {
Some(path)
} else {
None
}
}).collect()
},
Err(error) => {
tracing::error!("Could not read images: {:#}", error);
continue
},
};
let update_image_metadata = if image_dir_change != last_image_dir_change {
tracing::debug!("Update image list because image dir was modified");
true
} else if images != last_images {
// TODO: Maybe clear removed files from the image cache here?
tracing::debug!("Update image list because list of images changed");
true
} else if last_forced_update.elapsed() > Duration::from_secs(60*60) {
last_forced_update = Instant::now();
tracing::debug!("Update image list because one hour elapsed");
true
} else {
false
};
// Update image list
if update_image_metadata {
let images = images.clone();
let image_metadata = task::spawn_blocking(move || {
read_image_metadata(&images)
}).await.unwrap_or_else(|error| {
tracing::error!("Could not read images due to panic: {:#}", error);
Vec::new()
});
tracing::debug!("{} images in the image list cache", images.len());
*image_cache.write().unwrap() = images;
tracing::debug!("{} images in the image list cache", image_metadata.len());
*image_metadata_cache.write().unwrap() = image_metadata;
}
last_images = images;
last_image_dir_change = image_dir_change;
}
}
@ -405,28 +456,21 @@ fn fix_image_orientation(image: DynamicImage, exif: &Exif) -> DynamicImage {
}
}
fn read_images(directory: &Path) -> Result<Vec<ImageInfo>> {
fn read_image_metadata(images: &HashSet<PathBuf>) -> Vec<ImageInfo> {
let mut files = vec![];
let directory_iterator = directory
.read_dir()
.with_context(|| format!("Could not read files in directory {:?}", directory))?.flatten();
for file in directory_iterator {
let path = file.path();
if path.extension() == Some(OsStr::new("jpg")) {
let image_info = match read_image_info(&path) {
Ok(image_info) => image_info,
Err(error) => {
tracing::warn!("Skipping {:?} due to error: {:#}", path, error);
continue;
}
};
files.push(image_info);
}
for path in images {
let image_info = match read_image_info(path) {
Ok(image_info) => image_info,
Err(error) => {
tracing::warn!("Skipping {:?} due to error: {:#}", path, error);
continue;
}
};
files.push(image_info);
}
Ok(files)
files
}
fn extract_exif_string(field: &exif::Field) -> Option<String> {