From 48a3a206a361fc5b112d48cdb60c2dc29608b26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klemens=20Sch=C3=B6lhorn?= Date: Sun, 21 Jan 2024 17:42:22 +0100 Subject: [PATCH] Reduce resource usage of metadata update task This task previously read all images file's EXIF data every 60s, which leads to a very large CPU usage for seaf_fuse. The obvious fix of checking for modifications times and only read the change files does not work with seaf_fuse, because stat is not cheaper than actually opening and reading the file. So instead, we now only read the file list and the modification time of the image directory itself and update the image metadata only if there are changes. We also update the metadata at least once every hour to detect modified files on normal filesystems. Interestingly, seaf_fuse will update the mtime of a directory if any file in that directory is changed, so the logic implemented in this commit will actually detect image changes with seaf_fuse instantly. --- Cargo.lock | 125 +++++++++++++++++++++++++++++++++++----------------- src/main.rs | 112 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 163 insertions(+), 74 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac7fbca..1934e08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -378,18 +384,17 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.24" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", - "num-integer", "num-traits", "serde", - "time 0.1.45", "wasm-bindgen", - "winapi", + "windows-targets 0.48.5", ] [[package]] @@ -437,7 +442,7 @@ dependencies = [ "rand", "sha2 0.10.6", "subtle", - "time 0.3.20", + "time", "version_check", ] @@ -679,7 +684,7 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if 1.0.0", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -1062,7 +1067,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.45.0", ] @@ -1528,17 +1533,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "time" version = "0.3.20" @@ -1799,12 +1793,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1911,13 +1899,13 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.1", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm 0.42.1", + "windows_x86_64_msvc 0.42.1", ] [[package]] @@ -1926,7 +1914,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.1", ] [[package]] @@ -1935,13 +1923,28 @@ version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.1", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm 0.42.1", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -1950,42 +1953,84 @@ version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "zeroize" version = "1.5.7" diff --git a/src/main.rs b/src/main.rs index 2a16d0e..bb17184 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use std::{net::SocketAddr, path::{Path, PathBuf}, ffi::{OsStr, OsString}, fs::File, io::{BufReader, Seek, Cursor, BufRead}, env::args_os, os::unix::prelude::OsStrExt, cmp::Reverse, sync::{Arc, RwLock}, collections::HashMap}; +use std::{net::SocketAddr, path::{Path, PathBuf}, ffi::{OsStr, OsString}, fs::File, io::{BufReader, Seek, Cursor, BufRead}, env::args_os, os::unix::prelude::OsStrExt, cmp::Reverse, sync::{Arc, RwLock}, collections::{HashMap, HashSet}, time::{Instant, Duration}}; use anyhow::{Context, Result, anyhow}; use axum::{Router, routing::{get, post}, response::{IntoResponse, Redirect, Response}, http::{StatusCode, header}, extract::{self, State}, Form, handler::Handler}; @@ -118,12 +118,16 @@ async fn main() { async fn update_config_and_image_list_cache_job( image_dir: ImageDir, config: Config, - image_cache: ImageListCache, + image_metadata_cache: ImageListCache, sessions: MemoryStore, ) { - let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(60)); + let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(10)); interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + let mut last_images = HashSet::new(); + let mut last_image_dir_change = DateTime::::UNIX_EPOCH; + let mut last_forced_update = Instant::now(); + loop { interval.tick().await; @@ -158,23 +162,70 @@ async fn update_config_and_image_list_cache_job( sessions.clear_store().await.ok(); } - // Update image list - let image_dir = image_dir.clone(); - let images = task::spawn_blocking(move || { - // TODO: Only update images with changed modification times - read_images(&image_dir) - }).await - .unwrap_or_else(|error| { - tracing::error!("Could not read images due to panic: {:#}", error); - Ok(Vec::new()) - }) - .unwrap_or_else(|error| { + // Read the modification time of the image dir + // Note that some fuse implementations (e.g. seaf_fuse) will update + // the mtime of the directory when a file inside the directory is + // modified. It might also be very expensive to stat files in such + // a case, so we only record the modification time of the directory. + let image_dir_change = match image_dir.metadata().and_then(|m| m.modified()) { + Ok(modified) => modified.into(), + Err(error) => { + tracing::error!("Could not read modification time of image dir: {:#}", error); + continue + }, + }; + + // Read all image files from the image_dir + let images: HashSet<_> = match image_dir.read_dir() { + Ok(images) => { + // flatten ignores io error while traversing the iterator + images.flatten().filter_map(|entry| { + let path = entry.path(); + if path.extension() == Some(OsStr::new("jpg")) { + Some(path) + } else { + None + } + }).collect() + }, + Err(error) => { tracing::error!("Could not read images: {:#}", error); + continue + }, + }; + + let update_image_metadata = if image_dir_change != last_image_dir_change { + tracing::debug!("Update image list because image dir was modified"); + true + } else if images != last_images { + // TODO: Maybe clear removed files from the image cache here? + tracing::debug!("Update image list because list of images changed"); + true + } else if last_forced_update.elapsed() > Duration::from_secs(60*60) { + last_forced_update = Instant::now(); + tracing::debug!("Update image list because one hour elapsed"); + true + } else { + false + }; + + // Update image list + if update_image_metadata { + let images = images.clone(); + let image_metadata = task::spawn_blocking(move || { + read_image_metadata(&images) + }).await.unwrap_or_else(|error| { + tracing::error!("Could not read images due to panic: {:#}", error); Vec::new() }); - tracing::debug!("{} images in the image list cache", images.len()); - *image_cache.write().unwrap() = images; + tracing::debug!("{} images in the image list cache", image_metadata.len()); + *image_metadata_cache.write().unwrap() = image_metadata; + } + + last_images = images; + last_image_dir_change = image_dir_change; + } } @@ -405,28 +456,21 @@ fn fix_image_orientation(image: DynamicImage, exif: &Exif) -> DynamicImage { } } -fn read_images(directory: &Path) -> Result> { +fn read_image_metadata(images: &HashSet) -> Vec { let mut files = vec![]; - let directory_iterator = directory - .read_dir() - .with_context(|| format!("Could not read files in directory {:?}", directory))?.flatten(); - - for file in directory_iterator { - let path = file.path(); - if path.extension() == Some(OsStr::new("jpg")) { - let image_info = match read_image_info(&path) { - Ok(image_info) => image_info, - Err(error) => { - tracing::warn!("Skipping {:?} due to error: {:#}", path, error); - continue; - } - }; - files.push(image_info); - } + for path in images { + let image_info = match read_image_info(path) { + Ok(image_info) => image_info, + Err(error) => { + tracing::warn!("Skipping {:?} due to error: {:#}", path, error); + continue; + } + }; + files.push(image_info); } - Ok(files) + files } fn extract_exif_string(field: &exif::Field) -> Option {