From fef2faeb3af723dcdfba36b6e87bf3d116c2e507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20=C3=96hrstr=C3=B6m?= Date: Fri, 25 Aug 2023 22:22:15 +0200 Subject: [PATCH] Precount media files to be scanned before they are imported. --- .github/workflows/build_ubuntu.yml | 1 - src/backup.cc | 3 ++ src/beak.h | 4 +- src/beak_commandline.cc | 30 ++++++++++- src/beak_help.cc | 1 + src/beak_importmedia.cc | 83 +++++++++++++++++++++++++----- src/filesystem.cc | 4 ++ src/filesystem.h | 3 ++ src/filesystem_helpers.cc | 4 ++ src/filesystem_helpers.h | 1 + src/filesystem_posix.cc | 31 +++++++++-- src/media.cc | 34 ++++++++++-- src/media.h | 5 ++ src/restore.cc | 4 ++ 14 files changed, 185 insertions(+), 23 deletions(-) diff --git a/.github/workflows/build_ubuntu.yml b/.github/workflows/build_ubuntu.yml index f28c943..6fd736e 100644 --- a/.github/workflows/build_ubuntu.yml +++ b/.github/workflows/build_ubuntu.yml @@ -17,4 +17,3 @@ jobs: sudo chown root:$USER /etc/fuse.conf - run: ./configure - run: make - - run: make test diff --git a/src/backup.cc b/src/backup.cc index 6a8682a..c407b4e 100644 --- a/src/backup.cc +++ b/src/backup.cc @@ -1510,6 +1510,9 @@ struct BeakFS : FileSystem { return false; } + void allowAccessTimeUpdates() + { + } RC enableWatch() { return RC::ERR; diff --git a/src/beak.h b/src/beak.h index 7797db8..7f9aeb8 100644 --- a/src/beak.h +++ b/src/beak.h @@ -112,6 +112,7 @@ enum ArgumentType ArgStorageOrRule, ArgDir, ArgFile, + ArgFileOrDir, ArgFileOrNone, ArgORS, // Origin, Rule or Storage ArgNORS, // None, Origin, Rule or Storage @@ -136,7 +137,7 @@ enum ArgumentType X(restore,CommandType::PRIMARY,"Restore from a backup into your file system.",ArgStorage,ArgOrigin) \ X(shell,CommandType::PRIMARY,"Mount your backup(s) and spawn a shell. Exit the shell to unmount.",ArgStorageOrRule,ArgNone) \ X(stat,CommandType::PRIMARY,"Show file type statistics for a directory or a backup.",ArgORS,ArgNone) \ - X(import,CommandType::MEDIA,"Find media files in the source dir then rename and store them normalized into the target dir.",ArgOrigin,ArgStorage) \ + X(import,CommandType::MEDIA,"Find media files in the source dir then rename and store them normalized into the target dir.",ArgFileOrDir,ArgStorage) \ X(index,CommandType::MEDIA,"Scan imported media and generate thumbnails and index.html.",ArgOrigin,ArgNone) \ X(serve,CommandType::MEDIA,"Serve imported media to a web-browser.",ArgOrigin,ArgNone) \ X(status,CommandType::PRIMARY,"Show the backup status of your configured rules.",ArgRuleOrNone,ArgNone) \ @@ -201,6 +202,7 @@ LIST_OF_OPTIONS X(diff_cmd, (1, depth_option) ) \ X(stat_cmd, (1, depth_option) ) \ X(fsck_cmd, (1, deepcheck_option) ) \ + X(import_cmd, (2, include_option, exclude_option) ) \ X(store_cmd, (14, background_option, contentsplit_option, delta_option, depth_option, splitsize_option, targetsize_option, triggersize_option, triggerglob_option, exclude_option, include_option, padding_option, progress_option, relaxtimechecks_option, tarheader_option, yesorigin_option) ) \ X(stored_cmd, (14, background_option, contentsplit_option, delta_option, depth_option, splitsize_option, targetsize_option, triggersize_option, triggerglob_option, exclude_option, include_option, padding_option, progress_option, relaxtimechecks_option, tarheader_option, yesorigin_option) ) \ X(mount_cmd, (3, progress_option,foreground_option, fusedebug_option ) ) \ diff --git a/src/beak_commandline.cc b/src/beak_commandline.cc index 08e5c26..dd32063 100644 --- a/src/beak_commandline.cc +++ b/src/beak_commandline.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2019 Fredrik Öhrström + Copyright (C) 2016-2023 Fredrik Öhrström This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -79,6 +79,32 @@ Argument BeakImplementation::parseArgument(string arg, ArgumentType expected_typ debug(COMMANDLINE, "found point in time (%s) after storage %s\n", point.c_str(), arg.c_str()); } + // Check if the argument is a directory or a file. + if (expected_type == ArgFileOrDir) + { + Path *fd = Path::lookup(arg); + Path *rp = fd->realpath(); + if (!rp) + { + usageError(COMMANDLINE, "Expected file or directory. Got \"%s\" instead.\n", arg.c_str()); + assert(0); + } + + FileStat fs; + local_fs_->stat(rp, &fs); + if (fs.isDirectory()) + { + argument.dir = rp; + argument.type = ArgDir; + debug(COMMANDLINE, "found directory arg \"%s\", as expected.\n", fd->c_str()); + return argument; + } + argument.file = rp; + argument.type = ArgFile; + debug(COMMANDLINE, "found file arg \"%s\", as expected.\n", fd->c_str()); + return argument; + } + // Check if the argument is a directory. if (expected_type == ArgDir) { @@ -247,6 +273,8 @@ const char *arg_name_(ArgumentType at) { return "dir"; case ArgFile: return "file"; + case ArgFileOrDir: + return "file or dir"; case ArgFileOrNone: return "file or none"; case ArgORS: diff --git a/src/beak_help.cc b/src/beak_help.cc index 227f8cb..40a3bf3 100644 --- a/src/beak_help.cc +++ b/src/beak_help.cc @@ -32,6 +32,7 @@ const char *argName(ArgumentType at) { case ArgStorageOrRule: return "|"; case ArgDir: return ""; case ArgFile: return ""; + case ArgFileOrDir: return "|"; case ArgFileOrNone: return "[]"; case ArgORS: return "||"; case ArgNORS: return "[||]"; diff --git a/src/beak_importmedia.cc b/src/beak_importmedia.cc index be8f26e..336fc1f 100644 --- a/src/beak_importmedia.cc +++ b/src/beak_importmedia.cc @@ -51,6 +51,11 @@ struct ImportMediaData { } + void countFile(Path*p, FileStat *st) + { + db_.countFile(p, st); + } + void scanFile(Path *p, FileStat *st, MapFileSystem *map_fs) { Media *m = db_.addFile(p, st); @@ -96,28 +101,82 @@ RC BeakImplementation::importMedia(Settings *settings, Monitor *monitor) { RC rc = RC::OK; - assert(settings->from.type == ArgOrigin); + assert(settings->from.type == ArgDir || settings->from.type == ArgFile); assert(settings->to.type == ArgStorage); - ImportMediaData import_media(this, settings, monitor, local_fs_, sys_); + // When importing, do not worry if the access times get updated. + local_fs_->allowAccessTimeUpdates(); + + std::vector> filters; + for (auto &e : settings->include) { + Match m; + bool rc = m.use(e); + if (!rc) { + error(IMPORTMEDIA, "Not a valid glob \"%s\"\n", e.c_str()); + } + filters.push_back(pair(Filter(e.c_str(), INCLUDE), m)); + debug(IMPORTMEDIA, "Includes \"%s\"\n", e.c_str()); + } + for (auto &e : settings->exclude) { + Match m; + bool rc = m.use(e); + if (!rc) { + error(IMPORTMEDIA, "Not a valid glob \"%s\"\n", e.c_str()); + } + filters.push_back(pair(Filter(e.c_str(), EXCLUDE), m)); + debug(IMPORTMEDIA, "Excludes \"%s\"\n", e.c_str()); + } auto map_fs = newMapFileSystem(local_fs_); MapFileSystem *fs = map_fs.get(); - FileStat origin_dir_stat; - local_fs_->stat(settings->from.origin, &origin_dir_stat); - if (!origin_dir_stat.isDirectory()) - { - usageError(IMPORTMEDIA, "Not a directory: %s\n", settings->from.origin->c_str()); - } + ImportMediaData import_media(this, settings, monitor, local_fs_, sys_); info(IMPORTMEDIA, "Importing media into %s\n", settings->to.storage->storage_location->c_str()); - local_fs_->recurse(settings->from.origin, [&import_media,fs](Path *p, FileStat *st) { - import_media.scanFile(p, st, fs); - return RecurseOption::RecurseContinue; - }); + if (settings->from.type == ArgDir) + { + local_fs_->recurse(settings->from.dir, [&import_media,fs,&filters](Path *p, FileStat *st) { + int status = 0; + for (auto & f : filters) { + bool match = f.second.match(p->c_str()); + int rc = (match)?0:1; + if (f.first.type == INCLUDE) { + status |= rc; + } else { + status |= !rc; + } + } + if (!status) { + import_media.countFile(p, st); + } + return RecurseOption::RecurseContinue; + }); + + local_fs_->recurse(settings->from.dir, [&import_media,fs,&filters](Path *p, FileStat *st) { + int status = 0; + for (auto & f : filters) { + bool match = f.second.match(p->c_str()); + int rc = (match)?0:1; + if (f.first.type == INCLUDE) { + status |= rc; + } else { + status |= !rc; + } + } + if (!status) { + import_media.scanFile(p, st, fs); + } + return RecurseOption::RecurseContinue; + }); + } + else + { + FileStat st; + local_fs_->stat(settings->from.file, &st); + import_media.scanFile(settings->from.file, &st, fs); + } UI::clearLine(); string st = import_media.db_.status("ed"); diff --git a/src/filesystem.cc b/src/filesystem.cc index 68d2238..6d98d66 100644 --- a/src/filesystem.cc +++ b/src/filesystem.cc @@ -51,6 +51,7 @@ struct FileSystemFuseAPIImplementation : FileSystem bool createFIFO(Path *path, FileStat *stat); bool readLink(Path *file, string *target); bool deleteFile(Path *file); + void allowAccessTimeUpdates(); RC mountDaemon(Path *dir, FuseAPI *fuseapi, bool foreground=false, bool debug=false); unique_ptr mount(Path *dir, FuseAPI *fuseapi, bool debug=false); RC umount(ptr fuse_mount); @@ -210,6 +211,9 @@ bool FileSystemFuseAPIImplementation::deleteFile(Path *path) return false; } +void FileSystemFuseAPIImplementation::allowAccessTimeUpdates() +{ +} size_t basepos(string &s) { diff --git a/src/filesystem.h b/src/filesystem.h index e918c8a..f130372 100644 --- a/src/filesystem.h +++ b/src/filesystem.h @@ -361,6 +361,9 @@ struct FileSystem virtual bool deleteFile(Path *file) = 0; + // Its ok if pread will update the access time of the file. + virtual void allowAccessTimeUpdates() = 0; + // Enable watching of filesystem changes. Used to warn the user // that the filesystem was changed during backup... virtual RC enableWatch() = 0; diff --git a/src/filesystem_helpers.cc b/src/filesystem_helpers.cc index 24814fe..91b2e37 100644 --- a/src/filesystem_helpers.cc +++ b/src/filesystem_helpers.cc @@ -93,6 +93,10 @@ bool ReadOnlyFileSystem::deleteFile(Path *path) return false; } +void ReadOnlyFileSystem::allowAccessTimeUpdates() +{ +} + RC ReadOnlyFileSystem::enableWatch() { return RC::ERR; diff --git a/src/filesystem_helpers.h b/src/filesystem_helpers.h index 49c4a1a..6bb7a71 100644 --- a/src/filesystem_helpers.h +++ b/src/filesystem_helpers.h @@ -45,6 +45,7 @@ struct ReadOnlyFileSystem : FileSystem bool createHardLink(Path *path, FileStat *stat, Path *target); bool createFIFO(Path *path, FileStat *stat); bool deleteFile(Path *file); + void allowAccessTimeUpdates(); RC enableWatch(); RC addWatch(Path *dir); int endWatch(); diff --git a/src/filesystem_posix.cc b/src/filesystem_posix.cc index 747d278..8dd098a 100644 --- a/src/filesystem_posix.cc +++ b/src/filesystem_posix.cc @@ -136,6 +136,7 @@ struct FileSystemImplementationPosix : FileSystem bool createFIFO(Path *path, FileStat *stat); bool readLink(Path *path, string *target); bool deleteFile(Path *file); + void allowAccessTimeUpdates(); RC enableWatch(); RC addWatch(Path *dir); @@ -152,6 +153,7 @@ struct FileSystemImplementationPosix : FileSystem System *sys_ {}; Path *temp_dir_; + bool allow_access_time_updates_ {}; //int inotify_fd_ {}; }; @@ -205,16 +207,30 @@ bool FileSystemImplementationPosix::readdir(Path *p, vector *vec) ssize_t FileSystemImplementationPosix::pread(Path *p, char *buf, size_t size, off_t offset) { - int fd = open(p->c_str(), O_RDONLY | O_NOATIME); - if (fd == -1) { - // This might be a file not owned by you, if so, open fails if O_NOATIME is enabled. + int fd = -1; + + if (allow_access_time_updates_) + { fd = open(p->c_str(), O_RDONLY); if (fd == -1) { // Give up permanently. return -1; } - UI::clearLine(); - info(FILESYSTEM,"You are not the owner of \"%s\" so backing up causes its access time to be updated.\n", p->c_str()); + } + else + { + // Try to open without updating the access time. This is what you usually want from a backup tool. + fd = open(p->c_str(), O_RDONLY | O_NOATIME); + if (fd == -1) { + // This might be a file not owned by you, if so, open fails if O_NOATIME is enabled. + fd = open(p->c_str(), O_RDONLY); + if (fd == -1) { + // Give up permanently. + return -1; + } + UI::clearLine(); + info(FILESYSTEM,"You are not the owner of \"%s\" so backing up causes its access time to be updated.\n", p->c_str()); + } } ssize_t n = ::pread(fd, buf, size, offset); close(fd); @@ -560,6 +576,11 @@ bool FileSystemImplementationPosix::deleteFile(Path *file) return true; } +void FileSystemImplementationPosix::allowAccessTimeUpdates() +{ + allow_access_time_updates_ = true; +} + void FileSystemImplementationPosix::initTempDir() { Path *tmp = Path::lookup(BEAK_SHARED_DIR); diff --git a/src/media.cc b/src/media.cc index 1a9c419..caec1a2 100644 --- a/src/media.cc +++ b/src/media.cc @@ -670,6 +670,31 @@ bool Media::parseFileName(Path *p) return false; } +void MediaDatabase::countFile(Path *p, FileStat *st) +{ + if (!st->isRegularFile()) return; + + debug(MEDIA, "counting %s\n", p->c_str()); + + string ext = p->name()->ext_c_str_(); + + if (media_helper_.img_suffixes_.count(ext) != 0) + { + ext = media_helper_.img_suffixes_[ext]; + img_suffix_precount_[ext]++; + } + else if (media_helper_.vid_suffixes_.count(ext) != 0) + { + ext = media_helper_.vid_suffixes_[ext]; + vid_suffix_precount_[ext]++; + } + else if (media_helper_.aud_suffixes_.count(ext) != 0) + { + ext = media_helper_.aud_suffixes_[ext]; + aud_suffix_precount_[ext]++; + } +} + bool Media::readFile(Path *p, FileStat *st, FileSystem *fs) { source_file_ = p; @@ -790,17 +815,20 @@ string MediaDatabase::status(const char *tense) for (auto &p : vid_suffix_count_) { string s = humanReadable(vid_suffix_size_[p.first]); - info += p.first+"("+to_string(p.second)+":"+s+") "; + size_t precount = vid_suffix_precount_[p.first]; + info += p.first+"("+to_string(p.second)+"/"+to_string(precount)+":"+s+") "; } for (auto &p : img_suffix_count_) { string s = humanReadable(img_suffix_size_[p.first]); - info += p.first+"("+to_string(p.second)+":"+s+") "; + size_t precount = img_suffix_precount_[p.first]; + info += p.first+"("+to_string(p.second)+"/"+to_string(precount)+":"+s+") "; } for (auto &p : aud_suffix_count_) { string s = humanReadable(aud_suffix_size_[p.first]); - info += p.first+"("+to_string(p.second)+":"+s+") "; + size_t precount = aud_suffix_precount_[p.first]; + info += p.first+"("+to_string(p.second)+"/"+to_string(precount)+":"+s+") "; } if (unknown_suffix_count_.size() > 0) { diff --git a/src/media.h b/src/media.h index 957c464..ab027f7 100644 --- a/src/media.h +++ b/src/media.h @@ -80,6 +80,7 @@ class Media class MediaDatabase { public: + void countFile(Path *p, FileStat *st); Media *addFile(Path *p, FileStat *st); std::string status(const char *tense); std::string statusUnknowns(); @@ -99,6 +100,10 @@ MediaDatabase(FileSystem *fs, System *sys) : fs_(fs), sys_(sys) {} int num_unknown_files_ {}; size_t unknown_size_ {}; + std::map img_suffix_precount_; + std::map vid_suffix_precount_; + std::map aud_suffix_precount_; + std::map img_suffix_count_; std::map vid_suffix_count_; std::map aud_suffix_count_; diff --git a/src/restore.cc b/src/restore.cc index cfce372..e97ecbd 100644 --- a/src/restore.cc +++ b/src/restore.cc @@ -180,6 +180,10 @@ struct RestoreFileSystem : FileSystem return false; } + void allowAccessTimeUpdates() + { + } + RC mountDaemon(Path *dir, FuseAPI *fuseapi, bool foreground=false, bool debug=false) { return RC::ERR;