Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cpg314 committed Sep 3, 2023
0 parents commit c68033f
Show file tree
Hide file tree
Showing 8 changed files with 975 additions and 0 deletions.
603 changes: 603 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "mtime-rewind"
version = "0.1.0"
edition = "2021"
license = "MIT OR Apache-2.0"

[[bin]]
name = "mtime-rewind"

[dependencies]
anyhow = "1.0.75"
bincode = "1.3.3"
clap = { version = "4.4.0", features = ["derive"] }
env_logger = "0.10.0"
filetime = "0.2.22"
log = "0.4.20"
serde = { version = "1.0.188", features = ["derive"] }
sha2 = "0.10.7"
walkdir = "2.3.3"

[dev-dependencies]
tempfile = "3.8.0"
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# mtime-rewind

Rewind the `mtime` attribute of files whose modification advanced since the last execution without a content change.

This is useful to avoid unnecessary cache invalidations in systems using `mtime` to detect changes, for example [Rust's cargo](https://doc.rust-lang.org/cargo/) (see [this issue](https://github.com/rust-lang/cargo/issues/6529)).

More generally, see the [`mtime` comparison considered harmful blog post](https://apenwarr.ca/log/20181113).

## Usage

```console
$ mtime-rewind
Rewind the mtime of files whose mtime advanced since the last execution without a content change

Usage: mtime-rewind [OPTIONS] <ROOT>

Arguments:
<ROOT>

Options:
--dry Do not edit only mtime, only list the changes that would be made
-h, --help Print help

```

- The first execution will store hashes and modification times of files in a `.hashprint` file at the root. Hidden files and [cache directories](https://bford.info/cachedir/) are ignored.
- Subsequent executions will rewind files that have not changed to the previous modification time, and update the modification times of other files if necessary.

Typically, `mtime-rewind` can be executed as the first step of a CI build.

## Example

```console
$ mtime-rewind ~/project
[INFO mtime_rewind] Computing hashes...
[INFO mtime_rewind] Computed hashes for 9 files
[INFO mtime_rewind] Writing hashes for the first time...
[INFO mtime_rewind] Wrote "/root/project/.hashprint"
[INFO mtime_rewind] Done
$ touch src/main.rs
[INFO mtime_rewind] Computing hashes...
[INFO mtime_rewind] Computed hashes for 9 files
[INFO mtime_rewind] Restoring modification times for unchanged files...
[INFO mtime_rewind] Loading cached state...
[INFO mtime_rewind] Loaded hashes for 9 files
[INFO mtime_rewind] Rewinding "/root/project/src/main.rs" from SystemTime { tv_sec: 1693727396, tv_nsec: 146042169 } to SystemTime { tv_sec: 1693727019, tv_nsec: 668108072 } as its contents did not change
[INFO mtime_rewind] 1 files rewinded
[INFO mtime_rewind] Saving the new state...
[INFO mtime_rewind] Wrote "/root/project/.hashprint"
[INFO mtime_rewind] Done
```
70 changes: 70 additions & 0 deletions about.hbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<html>

<head>
<style>
@media (prefers-color-scheme: dark) {
body {
background: #333;
color: white;
}
a {
color: skyblue;
}
}
.container {
font-family: sans-serif;
max-width: 800px;
margin: 0 auto;
}
.intro {
text-align: center;
}
.licenses-list {
list-style-type: none;
margin: 0;
padding: 0;
}
.license-used-by {
margin-top: -10px;
}
.license-text {
max-height: 200px;
overflow-y: scroll;
white-space: pre-wrap;
}
</style>
</head>

<body>
<main class="container">
<div class="intro">
<h1>Third Party Licenses</h1>
<p>This page lists the licenses of the projects used in cargo-about.</p>
</div>

<h2>Overview of licenses:</h2>
<ul class="licenses-overview">
{{#each overview}}
<li><a href="#{{id}}">{{name}}</a> ({{count}})</li>
{{/each}}
</ul>

<h2>All license text:</h2>
<ul class="licenses-list">
{{#each licenses}}
<li class="license">
<h3 id="{{id}}">{{name}}</h3>
<h4>Used by:</h4>
<ul class="license-used-by">
{{#each used_by}}
<li><a href="{{#if crate.repository}} {{crate.repository}} {{else}} https://crates.io/crates/{{crate.name}} {{/if}}">{{crate.name}} {{crate.version}}</a></li>
{{/each}}
</ul>
<pre class="license-text">{{text}}</pre>
</li>
{{/each}}
</ul>
</main>
</body>

</html>
6 changes: 6 additions & 0 deletions about.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
accepted = [
"Apache-2.0",
"MIT",
"ISC",
"Unicode-DFS-2016"
]
14 changes: 14 additions & 0 deletions release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env sh
set -euo pipefail

VERSION=$(tomlq -r ".package.version" Cargo.toml)
PACKAGE=$(tomlq -r ".package.name" Cargo.toml)
for ARCH in x86_64-unknown-linux-gnu
do
cross build -r --target $ARCH
cargo about generate about.hbs > licenses.html
DEST=target/$PACKAGE-$VERSION-$ARCH.zip
echo $DEST
zip -j -r $DEST target/$ARCH/release/$PACKAGE target/$ARCH/release/mtime-rewind licenses.html
zip -sf $DEST
done
150 changes: 150 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};

use anyhow::Context;
use clap::Parser;
use log::*;
use serde::{Deserialize, Serialize};
use sha2::Digest;

/// Rewind the mtime of files whose mtime advanced since the last execution without a content change.
#[derive(Parser)]
struct Flags {
root: PathBuf,
/// Do not edit only mtime, only list the changes that would be made.
#[clap(long)]
dry: bool,
}
#[derive(Serialize, Deserialize, Debug)]
struct Entry {
hash: Vec<u8>,
mtime: std::time::SystemTime,
}

impl Entry {
fn from_file(filename: &Path) -> anyhow::Result<Self> {
let mut hasher = sha2::Sha256::new();
let file = std::fs::File::open(filename)?;
let mut file = std::io::BufReader::new(file);
std::io::copy(&mut file, &mut hasher)?;
let hash = hasher.finalize();

let meta = std::fs::metadata(filename)?;
Ok(Self {
hash: hash.to_vec(),
mtime: meta.modified()?,
})
}
}
#[derive(Serialize, Deserialize)]
struct Data {
data: HashMap<PathBuf, Entry>,
root: PathBuf,
}
impl Data {
fn compute(root: &Path) -> anyhow::Result<Self> {
info!("Computing hashes...");
let files = walkdir::WalkDir::new(root)
.min_depth(1)
.into_iter()
// Skip hidden entries and cache folders (e.g. cargo's target fodlers)
.filter_entry(|e| {
!e.path().join("CACHEDIR.TAG").exists()
&& !e
.path()
.file_name()
.and_then(|f| f.to_str())
.map_or(false, |f| f.starts_with('.'))
})
.filter_map(|e| e.ok())
.filter(|e| e.metadata().map_or(false, |e| e.is_file()));

// Compute current hashes
let mut data = HashMap::default();
for entry in files {
data.insert(entry.path().into(), Entry::from_file(entry.path())?);
}
info!("Computed hashes for {} files", data.len());
Ok(Self {
data,
root: root.into(),
})
}
fn hashes_file(root: &Path) -> PathBuf {
root.join(".hashprint")
}
fn load_cached(root: &Path) -> anyhow::Result<Self> {
info!("Loading cached state...");
let cached =
std::fs::read(Self::hashes_file(root)).context("Could not open hash file.")?;
let cached: Self = bincode::deserialize(&cached)?;
anyhow::ensure!(
cached.root == root,
"Mismatching roots found: {:?} vs {:?}",
cached.root,
root
);
info!("Loaded hashes for {:?} files", cached.data.len());
Ok(cached)
}
fn save(&self) -> anyhow::Result<()> {
let output = Self::hashes_file(&self.root);
std::fs::write(&output, bincode::serialize(&self)?)?;
info!("Wrote {:?}", output);
Ok(())
}
}
fn main() -> anyhow::Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let args = Flags::parse();

let live = Data::compute(&args.root)?;

if !Data::hashes_file(&args.root).exists() {
info!("Writing hashes for the first time...");
live.save()?;
} else {
info!("Restoring modification times for unchanged files...");
let stored = Data::load_cached(&args.root)?;

let mut edited = HashMap::<PathBuf, Entry>::default();
for (f, stored) in stored.data {
if let Some(live) = live.data.get(&f) {
debug!("{:?}: {:?} (live) vs {:?} (stored)", f, live, stored);
// Find files whose contents haven't changed, yet the mtime is set to later than
// on the previous run
if live.mtime > stored.mtime {
if live.hash != stored.hash {
// Legitimate mtime increase
info!("{:?} was actually modified", f);
} else {
info!(
"Rewinding {:?} from {:?} to {:?} as its contents did not change",
f, live.mtime, stored.mtime
);
if args.dry {
warn!("Dry mode, not applying changes");
} else {
filetime::set_file_mtime(
&f,
filetime::FileTime::from_system_time(stored.mtime),
)?;
edited.insert(f, stored);
}
}
}
}
}

info!("{} files rewinded", edited.len());
// Apply the new state before saving
let mut live = live;
live.data.extend(edited);
if !args.dry {
info!("Saving the new state...");
live.save()?;
}
}
info!("Done");
Ok(())
}
59 changes: 59 additions & 0 deletions tests/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use std::path::Path;

fn exec(root: impl AsRef<Path>) -> anyhow::Result<()> {
let bin = std::env!("CARGO_BIN_EXE_mtime-rewind");
let status = std::process::Command::new(bin)
.arg(root.as_ref())
.spawn()?
.wait()?;
anyhow::ensure!(status.success());
Ok(())
}

fn touch(path: &Path) -> anyhow::Result<()> {
anyhow::ensure!(std::process::Command::new("touch")
.arg(path)
.spawn()?
.wait()?
.success());
Ok(())
}
fn mtime(path: &Path) -> anyhow::Result<std::time::SystemTime> {
Ok(std::fs::metadata(path)?.modified()?)
}

#[test]
fn test() -> anyhow::Result<()> {
let dir = tempfile::tempdir()?;

let dir_path = dir.path();

let a = dir_path.join("a");
let b = dir_path.join("b");

std::fs::write(&a, "a")?;
std::fs::write(&b, "b")?;
let mtime_a = mtime(&a)?;
let mtime_b = mtime(&b)?;

exec(&dir)?;
assert_eq!(mtime_a, mtime(&a)?);
assert_eq!(mtime_b, mtime(&b)?);

touch(&a)?;
std::fs::write(&b, "b2")?;
let mtime_b2 = mtime(&b)?;

exec(&dir)?;
// a should be rewinded
assert_eq!(mtime_a, mtime(&a)?);
// but not b
assert_ne!(mtime_b, mtime(&b)?);

touch(&b)?;

exec(&dir)?;
// b should be rewinded
assert_eq!(mtime_b2, mtime(&b)?);
Ok(())
}

0 comments on commit c68033f

Please sign in to comment.