Skip to content

Commit

Permalink
feat: add a way of using GHA cache locally
Browse files Browse the repository at this point in the history
This is the first implementation of uploading the local cache
as a single file to a remote cache for reuse in a future build.

Right now it is only done for GHA as that was the intended scope¹,
but one could adapt this system to other remote caches.

Because of the immutability of GHACache, this commit only adds support
for re-using the cache for the same version (as defined by the user
through the `SCCACHE_GHA_VERSION` environment variable).
A way of reusing incremental build within a given version or even
across versions could be devised, but it falls outside the scope of
this particular effort, and it's probably not trivial.

[1] Mozilla-Actions/sccache-action#81
  • Loading branch information
Alphare committed Apr 24, 2024
1 parent c8d5ffa commit 464138d
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 14 deletions.
52 changes: 51 additions & 1 deletion .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,56 @@ jobs:
${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"
gha-as-local:
runs-on: ubuntu-latest
needs: build

env:
SCCACHE_GHA_ENABLED: "on"
SCCACHE_GHA_AS_LOCAL: "on"
RUSTC_WRAPPER: /home/runner/.cargo/bin/sccache

steps:
- name: Clone repository
uses: actions/checkout@v4

- name: Configure Cache Env
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Install rust
uses: ./.github/actions/rust-toolchain
with:
toolchain: "stable"

- uses: actions/download-artifact@v4
with:
name: integration-tests
path: /home/runner/.cargo/bin/
- name: Chmod for binary
run: chmod +x ${SCCACHE_PATH}

- name: Test
run: cargo clean && cargo build

- name: Output
run: |
${SCCACHE_PATH} --show-stats
${SCCACHE_PATH} --show-stats | grep gha
- name: Test Twice for Cache Read
run: cargo clean && cargo build

- name: Output
run: |
${SCCACHE_PATH} --show-stats
${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"
memcached-deprecated:
runs-on: ubuntu-latest
needs: build
Expand Down Expand Up @@ -576,7 +626,7 @@ jobs:
${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"
hip:
# Probably wouldn't matter anyway since we run in a container, but staying
# Probably wouldn't matter anyway since we run in a container, but staying
# close to the version is better than not.
runs-on: ubuntu-22.04
needs: build
Expand Down
61 changes: 54 additions & 7 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,24 @@ pub fn storage_from_config(
return Ok(Arc::new(storage));
}
#[cfg(feature = "gha")]
CacheType::GHA(config::GHACacheConfig { ref version, .. }) => {
debug!("Init gha cache with version {version}");

let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
CacheType::GHA(config::GHACacheConfig {
ref version,
as_local,
..
}) => {
if *as_local {
debug!("Init gha as local cache");
let downloaded_path = pool
.block_on(GHACache::download_to_local(config, version))
.map_err(|err| anyhow!("download gha cache as local failed: {err:?}"))?;
let storage = disk_cache_from_config(config, pool, downloaded_path)?;
return Ok(storage);
} else {
debug!("Init gha cache with version {version}");
let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
}
}
#[cfg(feature = "memcached")]
CacheType::Memcached(config::MemcachedCacheConfig {
Expand Down Expand Up @@ -724,7 +736,21 @@ pub fn storage_from_config(
}
}

let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size);
disk_cache_from_config(config, pool, None)
}

fn disk_cache_from_config(
config: &Config,
pool: &tokio::runtime::Handle,
root_override: Option<PathBuf>,
) -> Result<Arc<dyn Storage>> {
let (mut dir, size) = (
config.fallback_cache.dir.to_owned(),
config.fallback_cache.size,
);
if let Some(new_root) = root_override {
dir = dir.join(new_root);
}
let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode;
let rw_mode = config.fallback_cache.rw_mode.into();
debug!("Init disk cache with dir {:?}, size {}", dir, size);
Expand All @@ -737,6 +763,27 @@ pub fn storage_from_config(
)))
}

#[cfg(feature = "gha")]
pub async fn upload_local_cache(config: &Config) -> Result<()> {
match &config.cache {
Some(CacheType::GHA(gha_config)) => {
if !gha_config.enabled {
debug!("GHA cache is disabled in config");
return Ok(());
}
if !gha_config.as_local {
debug!("GHA not configured `as_local`");
return Ok(());
}
GHACache::upload_local_cache(config).await
}
_ => {
debug!("Uploading the local cache is only possible when using GitHub Actions");
Ok(())
}
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
142 changes: 142 additions & 0 deletions src/cache/gha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::path::PathBuf;

use opendal::layers::LoggingLayer;
use opendal::services::Ghac;
use opendal::Operator;

use crate::config::Config;
use crate::errors::*;
use crate::VERSION;

const FULL_GHA_CACHE_ROOT: &str = "sccache-full";

/// A cache that stores entries in GHA Cache Services.
pub struct GHACache;

Expand All @@ -43,4 +48,141 @@ impl GHACache {
.finish();
Ok(op)
}

/// Download a copy of the entire GHA cache from the given version
/// and return the path to the root folder on the local disk.
///
/// It is the user's responsibility to split the caches according
/// to anything relevant like architecture, OS, etc. by using the `version`.
pub async fn download_to_local(config: &Config, version: &str) -> Result<Option<PathBuf>> {
let tarball_path = local_cache_tarball_path(config);
let mut builder = Ghac::default();

// TODO somehow loop over decreasingly "fresh" versions of the cache
// like in
// https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
// For now the behavior is to match the same version, which would
// speed up rebuilds in the same (Git) branch.
//
// A few things to note that make this difficult, plus ideas:
// - GHA's cache is immutable (meaning you cannot modify a given path
// for a given version), so we cannot reuse a "global version"
// - GHA's cache doesn't allow for listing items in a version
// - GHA's cache is not shared across branches, except for branches
// that are directly from the default branch, which can use the
// default cache.
// - Maybe only using the default branch cache with a way of renewing
// it periodically is already a benefit.
// - This maybe could be done as a fallback if the current branch cache
// is empty, though this is unclear to me at the time of writing.
if version.is_empty() {
builder.version(&format!("sccache-v{VERSION}"));
} else {
builder.version(&format!("sccache-v{VERSION}-{version}"));
}

let op = Operator::new(builder)?
.layer(LoggingLayer::default())
.finish();

if !op.is_exist(FULL_GHA_CACHE_ROOT).await? {
info!("Remote full gha cache does not exist: nothing to do");
return Ok(None);
}
debug!("Found full gha cache");

let mut reader = op.reader(FULL_GHA_CACHE_ROOT).await?;
std::fs::create_dir_all(tarball_path.parent().expect("root path"))?;

let mut writer = tokio::fs::OpenOptions::new()
.write(true)
.create(true)
.open(&tarball_path)
.await
.context("opening the local tarball for writing")?;

if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await {
match error.kind() {
std::io::ErrorKind::NotFound => {
debug!("Remote full gha cache was deleted: nothing to do");
// TOCTOU race with the above existence check and the cache
// being cleared.
return Ok(None);
}
_ => {
bail!(error)
}
}
};

let cache = local_cache_path(config);
let tarball =
std::fs::File::open(tarball_path).context("Failed to open the GHA cache tarball")?;
tar::Archive::new(tarball)
.unpack(&cache)
.context("Failed to extract the GHA cache tarball")?;

Ok(Some(cache))
}

/// Upload a tarball of the local cache
pub async fn upload_local_cache(config: &Config) -> Result<()> {
let cache = local_cache_path(config);
if !cache.exists() {
info!("Local cache does not exist: nothing to do");
return Ok(());
}
debug!("Found local gha cache at {}", cache.display());

let op = Operator::new(Ghac::default())?
.layer(LoggingLayer::default())
.finish();

// GHA cache is immutable, if the path has already been written within
// a given version, it cannot be changed again.
if op.is_exist(FULL_GHA_CACHE_ROOT).await? {
info!("Remote cache of this version already exists, cannot upload");
return Ok(());
}

let mut tar_builder = tar::Builder::new(vec![]);
tar_builder
.append_dir_all(local_cache_path(config), ".")
.context("Failed to create GHA local cache tarball")?;
let source = local_cache_tarball_path(config);
std::fs::write(&source, tar_builder.into_inner()?)
.context("Failed to write the GHA local cache tarball to disk")?;

let mut writer = op
.writer(FULL_GHA_CACHE_ROOT)
.await
.context("opening the remote tarball for writing")?;

let mut reader = tokio::fs::File::open(&source)
.await
.context("opening the local tarball for reading")?;

if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await {
match error.kind() {
std::io::ErrorKind::AlreadyExists => {
debug!("Remote cache of this version raced us, cannot upload");
// TOCTOU race with the above existence check and the cache
// being uploaded by another worker.
return Ok(());
}
_ => bail!(error),
}
}
Ok(())
}
}

fn local_cache_tarball_path(config: &Config) -> PathBuf {
let mut path = config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT);
path.set_extension(".tar");
path
}

fn local_cache_path(config: &Config) -> PathBuf {
config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT)
}
17 changes: 15 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ pub struct GHACacheConfig {
/// Version for gha cache is a namespace. By setting different versions,
/// we can avoid mixed caches.
pub version: String,
/// Download the entire cache to be used like a local cache, then upload
/// it back if anything changed.
/// This is useful in CI contexts to reduce the number of requests,
/// hence avoiding rate limiting and improving overall cache speed.
pub as_local: bool,
}

/// Memcached's default value of expiration is 10800s (3 hours), which is too
Expand Down Expand Up @@ -784,24 +789,30 @@ fn config_from_env() -> Result<EnvConfig> {
});

// ======= GHA =======
let gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
let mut gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
// If SCCACHE_GHA_VERSION has been set, we don't need to check
// SCCACHE_GHA_ENABLED's value anymore.
Some(GHACacheConfig {
enabled: true,
version,
as_local: false,
})
} else if bool_from_env_var("SCCACHE_GHA_ENABLED")?.unwrap_or(false) {
// If only SCCACHE_GHA_ENABLED has been set to the true value, enable with
// default version.
Some(GHACacheConfig {
enabled: true,
version: "".to_string(),
as_local: false,
})
} else {
None
};

if let Some(gha) = &mut gha {
gha.as_local = bool_from_env_var("SCCACHE_GHA_AS_LOCAL")?.unwrap_or(false);
}

// ======= Azure =======
let azure = if let (Ok(connection_string), Ok(container)) = (
env::var("SCCACHE_AZURE_CONNECTION_STRING"),
Expand Down Expand Up @@ -1453,6 +1464,7 @@ service_account = "example_service_account"
[cache.gha]
enabled = true
version = "sccache"
as_local = false
[cache.memcached]
# Deprecated alias for `endpoint`
Expand Down Expand Up @@ -1519,7 +1531,8 @@ no_credentials = true
}),
gha: Some(GHACacheConfig {
enabled: true,
version: "sccache".to_string()
version: "sccache".to_string(),
as_local: false,
}),
redis: Some(RedisCacheConfig {
url: Some("redis://user:passwd@1.2.3.4:6379/?db=1".to_owned()),
Expand Down
Loading

0 comments on commit 464138d

Please sign in to comment.