diff --git a/lib/mu-indexer.cc b/lib/mu-indexer.cc index 12ff06a45..a658ada3d 100644 --- a/lib/mu-indexer.cc +++ b/lib/mu-indexer.cc @@ -145,6 +145,8 @@ struct Indexer::Private { std::mutex lock_, w_lock_; std::atomic completed_{}; bool was_empty_{}; + + uint64_t last_index_{}; }; bool @@ -206,12 +208,16 @@ Indexer::Private::handler(const std::string& fullpath, struct stat* statbuf, case Scanner::HandleType::File: { ++progress_.checked; + if (conf_.lazy_check && static_cast(statbuf->st_ctime) < last_index_) { + // in lazy mode, ignore the file if it has not changed + // since the last indexing op. + return false; + } - if ((size_t)statbuf->st_size > max_message_size_) { + if (static_cast(statbuf->st_size) > max_message_size_) { mu_debug("skip {} (too big: {} bytes)", fullpath, statbuf->st_size); return false; } - // if the message is not in the db yet, or not up-to-date, queue // it for updating/inserting. if (statbuf->st_ctime <= dirstamp_ && store_.contains_message(fullpath)) @@ -414,6 +420,10 @@ Indexer::Private::start(const Indexer::Config& conf, bool block) mu_debug("indexing: {}; clean-up: {}", conf_.scan ? "yes" : "no", conf_.cleanup ? "yes" : "no"); + // remember the _previous_ indexing, so in lazy mode we can skip + // those files. + last_index_ = store_.config().get(); + state_.change_to(IndexState::Scanning); /* kick off the first worker, which will spawn more if needed. */ workers_.emplace_back(std::thread([this] { item_worker(); })); diff --git a/lib/mu-indexer.hh b/lib/mu-indexer.hh index 3ea1fb632..7fa24169b 100644 --- a/lib/mu-indexer.hh +++ b/lib/mu-indexer.hh @@ -54,8 +54,8 @@ public: bool ignore_noupdate{}; /**< ignore .noupdate files */ bool lazy_check{}; - /**< whether to skip directories that don't have a changed - * mtime */ + /**< whether to skip directories or message files that haven't changed since the + * previous indexing operation, based on their ctime */ }; /** diff --git a/man/mu-index.1.org b/man/mu-index.1.org index 5ddb35b77..af49eb023 100644 --- a/man/mu-index.1.org +++ b/man/mu-index.1.org @@ -41,7 +41,7 @@ If there is a file called _.noupdate_ in a directory, the contents of that directory and all of its subdirectories will be ignored. This can be useful to speed up things you have some maildirs that never change. -_.noupdate_ does not affect already-indexed message: you can still search for +_.noupdate_ does not affect already-indexed messages: you can still search for them. _.noupdate_ is ignored when you start indexing with an empty database (such as directly after *mu init*). @@ -58,7 +58,7 @@ the database for which there is no longer a corresponding file in the Maildir. If you do not want this, you can use *-n*, *--nocleanup*. When *mu index* catches one of the signals *SIGINT*, *SIGHUP* or *SIGTERM* (e.g., when -you press Ctrl-C during the indexing process), it attempts to shutdown +you press *Ctrl-C* during the indexing process), it attempts to shutdown gracefully; it tries to save and commit data, and close the database etc. If it receives another signal (e.g., when pressing Ctrl-C once more), *mu index* will terminate immediately. @@ -67,12 +67,17 @@ terminate immediately. ** --lazy-check In lazy-check mode, *mu* does not consider messages for which the time-stamp -(ctime) of the directory they reside in has not changed since the previous -indexing run. This is much faster than the non-lazy check, but won't update -messages that have change (rather than having been added or removed), since -merely editing a message does not update the directory time-stamp. Of course, -you can run *mu-index* occasionally without *--lazy-check*, to pick up such -messages. +(*ctime*) of the directory in which they reside, has not changed since the +previous time this directory was checked. + +This is much faster than the non-lazy check, but won't update messages that have +changed (rather than having been added or removed), since merely editing a +message does not update the directory time-stamp. Of course, you can run +*mu-index* occasionally without *--lazy-check*, to pick up such messages. + +Furthermore, in lazy-check mode, files which have a *ctime* smaller than the time +the previous indexing operation was completed, are ignored. This helps for the +use-case where new messages can appear in big maildirs. ** --nocleanup Disable the database cleanup that *mu* does by default after indexing. @@ -172,7 +177,7 @@ The instructions are a little different since we have a proper repeatable benchmark now. After building, #+begin_example - $ sudo sh -c 'sync && echo 3 > /proc/sys/vm/drop_caches' +$ sudo sh -c 'sync && echo 3 > /proc/sys/vm/drop_caches' % THREAD_NUM=4 build/lib/tests/bench-indexer -m perf # random seed: R02Sf5c50e4851ec51adaf301e0e054bd52b 1..1 @@ -185,7 +190,7 @@ ok 1 /bench/indexer/4-cores #+end_example Things are again a little faster, even though the index does a lot more now -(text-normalizatian, and pre-generating message-sexps). A faster machine helps, +(text-normalization, and pre-generating message-sexps). A faster machine helps, too! ** recent releases