From 464138dd57a9c48fec55f915f837387853d2019a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Gom=C3=A8s?= Date: Tue, 2 Apr 2024 16:14:14 +0200 Subject: [PATCH] feat: add a way of using GHA cache locally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the first implementation of uploading the local cache as a single file to a remote cache for reuse in a future build. Right now it is only done for GHA as that was the intended scopeĀ¹, but one could adapt this system to other remote caches. Because of the immutability of GHACache, this commit only adds support for re-using the cache for the same version (as defined by the user through the `SCCACHE_GHA_VERSION` environment variable). A way of reusing incremental build within a given version or even across versions could be devised, but it falls outside the scope of this particular effort, and it's probably not trivial. [1] https://github.com/Mozilla-Actions/sccache-action/issues/81 --- .github/workflows/integration-tests.yml | 52 ++++++++- src/cache/cache.rs | 61 ++++++++-- src/cache/gha.rs | 142 ++++++++++++++++++++++++ src/config.rs | 17 ++- src/server.rs | 14 ++- 5 files changed, 272 insertions(+), 14 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index a3b85fce3..28b8dfacc 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -288,6 +288,56 @@ jobs: ${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]" + gha-as-local: + runs-on: ubuntu-latest + needs: build + + env: + SCCACHE_GHA_ENABLED: "on" + SCCACHE_GHA_AS_LOCAL: "on" + RUSTC_WRAPPER: /home/runner/.cargo/bin/sccache + + steps: + - name: Clone repository + uses: actions/checkout@v4 + + - name: Configure Cache Env + uses: actions/github-script@v7 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + + - name: Install rust + uses: ./.github/actions/rust-toolchain + with: + toolchain: "stable" + + - uses: actions/download-artifact@v4 + with: + name: integration-tests + path: /home/runner/.cargo/bin/ + - name: Chmod for binary + run: chmod +x ${SCCACHE_PATH} + + - name: Test + run: cargo clean && cargo build + + - name: Output + run: | + ${SCCACHE_PATH} --show-stats + + ${SCCACHE_PATH} --show-stats | grep gha + + - name: Test Twice for Cache Read + run: cargo clean && cargo build + + - name: Output + run: | + ${SCCACHE_PATH} --show-stats + + ${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]" + memcached-deprecated: runs-on: ubuntu-latest needs: build @@ -576,7 +626,7 @@ jobs: ${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]" hip: - # Probably wouldn't matter anyway since we run in a container, but staying + # Probably wouldn't matter anyway since we run in a container, but staying # close to the version is better than not. runs-on: ubuntu-22.04 needs: build diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 9d61788c8..aa6cd5778 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -591,12 +591,24 @@ pub fn storage_from_config( return Ok(Arc::new(storage)); } #[cfg(feature = "gha")] - CacheType::GHA(config::GHACacheConfig { ref version, .. }) => { - debug!("Init gha cache with version {version}"); - - let storage = GHACache::build(version) - .map_err(|err| anyhow!("create gha cache failed: {err:?}"))?; - return Ok(Arc::new(storage)); + CacheType::GHA(config::GHACacheConfig { + ref version, + as_local, + .. + }) => { + if *as_local { + debug!("Init gha as local cache"); + let downloaded_path = pool + .block_on(GHACache::download_to_local(config, version)) + .map_err(|err| anyhow!("download gha cache as local failed: {err:?}"))?; + let storage = disk_cache_from_config(config, pool, downloaded_path)?; + return Ok(storage); + } else { + debug!("Init gha cache with version {version}"); + let storage = GHACache::build(version) + .map_err(|err| anyhow!("create gha cache failed: {err:?}"))?; + return Ok(Arc::new(storage)); + } } #[cfg(feature = "memcached")] CacheType::Memcached(config::MemcachedCacheConfig { @@ -724,7 +736,21 @@ pub fn storage_from_config( } } - let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size); + disk_cache_from_config(config, pool, None) +} + +fn disk_cache_from_config( + config: &Config, + pool: &tokio::runtime::Handle, + root_override: Option, +) -> Result> { + let (mut dir, size) = ( + config.fallback_cache.dir.to_owned(), + config.fallback_cache.size, + ); + if let Some(new_root) = root_override { + dir = dir.join(new_root); + } let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode; let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); @@ -737,6 +763,27 @@ pub fn storage_from_config( ))) } +#[cfg(feature = "gha")] +pub async fn upload_local_cache(config: &Config) -> Result<()> { + match &config.cache { + Some(CacheType::GHA(gha_config)) => { + if !gha_config.enabled { + debug!("GHA cache is disabled in config"); + return Ok(()); + } + if !gha_config.as_local { + debug!("GHA not configured `as_local`"); + return Ok(()); + } + GHACache::upload_local_cache(config).await + } + _ => { + debug!("Uploading the local cache is only possible when using GitHub Actions"); + Ok(()) + } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/cache/gha.rs b/src/cache/gha.rs index 8d8373430..83eff345d 100644 --- a/src/cache/gha.rs +++ b/src/cache/gha.rs @@ -12,13 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::path::PathBuf; + use opendal::layers::LoggingLayer; use opendal::services::Ghac; use opendal::Operator; +use crate::config::Config; use crate::errors::*; use crate::VERSION; +const FULL_GHA_CACHE_ROOT: &str = "sccache-full"; + /// A cache that stores entries in GHA Cache Services. pub struct GHACache; @@ -43,4 +48,141 @@ impl GHACache { .finish(); Ok(op) } + + /// Download a copy of the entire GHA cache from the given version + /// and return the path to the root folder on the local disk. + /// + /// It is the user's responsibility to split the caches according + /// to anything relevant like architecture, OS, etc. by using the `version`. + pub async fn download_to_local(config: &Config, version: &str) -> Result> { + let tarball_path = local_cache_tarball_path(config); + let mut builder = Ghac::default(); + + // TODO somehow loop over decreasingly "fresh" versions of the cache + // like in + // https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#matching-a-cache-key + // For now the behavior is to match the same version, which would + // speed up rebuilds in the same (Git) branch. + // + // A few things to note that make this difficult, plus ideas: + // - GHA's cache is immutable (meaning you cannot modify a given path + // for a given version), so we cannot reuse a "global version" + // - GHA's cache doesn't allow for listing items in a version + // - GHA's cache is not shared across branches, except for branches + // that are directly from the default branch, which can use the + // default cache. + // - Maybe only using the default branch cache with a way of renewing + // it periodically is already a benefit. + // - This maybe could be done as a fallback if the current branch cache + // is empty, though this is unclear to me at the time of writing. + if version.is_empty() { + builder.version(&format!("sccache-v{VERSION}")); + } else { + builder.version(&format!("sccache-v{VERSION}-{version}")); + } + + let op = Operator::new(builder)? + .layer(LoggingLayer::default()) + .finish(); + + if !op.is_exist(FULL_GHA_CACHE_ROOT).await? { + info!("Remote full gha cache does not exist: nothing to do"); + return Ok(None); + } + debug!("Found full gha cache"); + + let mut reader = op.reader(FULL_GHA_CACHE_ROOT).await?; + std::fs::create_dir_all(tarball_path.parent().expect("root path"))?; + + let mut writer = tokio::fs::OpenOptions::new() + .write(true) + .create(true) + .open(&tarball_path) + .await + .context("opening the local tarball for writing")?; + + if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await { + match error.kind() { + std::io::ErrorKind::NotFound => { + debug!("Remote full gha cache was deleted: nothing to do"); + // TOCTOU race with the above existence check and the cache + // being cleared. + return Ok(None); + } + _ => { + bail!(error) + } + } + }; + + let cache = local_cache_path(config); + let tarball = + std::fs::File::open(tarball_path).context("Failed to open the GHA cache tarball")?; + tar::Archive::new(tarball) + .unpack(&cache) + .context("Failed to extract the GHA cache tarball")?; + + Ok(Some(cache)) + } + + /// Upload a tarball of the local cache + pub async fn upload_local_cache(config: &Config) -> Result<()> { + let cache = local_cache_path(config); + if !cache.exists() { + info!("Local cache does not exist: nothing to do"); + return Ok(()); + } + debug!("Found local gha cache at {}", cache.display()); + + let op = Operator::new(Ghac::default())? + .layer(LoggingLayer::default()) + .finish(); + + // GHA cache is immutable, if the path has already been written within + // a given version, it cannot be changed again. + if op.is_exist(FULL_GHA_CACHE_ROOT).await? { + info!("Remote cache of this version already exists, cannot upload"); + return Ok(()); + } + + let mut tar_builder = tar::Builder::new(vec![]); + tar_builder + .append_dir_all(local_cache_path(config), ".") + .context("Failed to create GHA local cache tarball")?; + let source = local_cache_tarball_path(config); + std::fs::write(&source, tar_builder.into_inner()?) + .context("Failed to write the GHA local cache tarball to disk")?; + + let mut writer = op + .writer(FULL_GHA_CACHE_ROOT) + .await + .context("opening the remote tarball for writing")?; + + let mut reader = tokio::fs::File::open(&source) + .await + .context("opening the local tarball for reading")?; + + if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await { + match error.kind() { + std::io::ErrorKind::AlreadyExists => { + debug!("Remote cache of this version raced us, cannot upload"); + // TOCTOU race with the above existence check and the cache + // being uploaded by another worker. + return Ok(()); + } + _ => bail!(error), + } + } + Ok(()) + } +} + +fn local_cache_tarball_path(config: &Config) -> PathBuf { + let mut path = config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT); + path.set_extension(".tar"); + path +} + +fn local_cache_path(config: &Config) -> PathBuf { + config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT) } diff --git a/src/config.rs b/src/config.rs index 4173bcaac..6ebd7f451 100644 --- a/src/config.rs +++ b/src/config.rs @@ -213,6 +213,11 @@ pub struct GHACacheConfig { /// Version for gha cache is a namespace. By setting different versions, /// we can avoid mixed caches. pub version: String, + /// Download the entire cache to be used like a local cache, then upload + /// it back if anything changed. + /// This is useful in CI contexts to reduce the number of requests, + /// hence avoiding rate limiting and improving overall cache speed. + pub as_local: bool, } /// Memcached's default value of expiration is 10800s (3 hours), which is too @@ -784,12 +789,13 @@ fn config_from_env() -> Result { }); // ======= GHA ======= - let gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") { + let mut gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") { // If SCCACHE_GHA_VERSION has been set, we don't need to check // SCCACHE_GHA_ENABLED's value anymore. Some(GHACacheConfig { enabled: true, version, + as_local: false, }) } else if bool_from_env_var("SCCACHE_GHA_ENABLED")?.unwrap_or(false) { // If only SCCACHE_GHA_ENABLED has been set to the true value, enable with @@ -797,11 +803,16 @@ fn config_from_env() -> Result { Some(GHACacheConfig { enabled: true, version: "".to_string(), + as_local: false, }) } else { None }; + if let Some(gha) = &mut gha { + gha.as_local = bool_from_env_var("SCCACHE_GHA_AS_LOCAL")?.unwrap_or(false); + } + // ======= Azure ======= let azure = if let (Ok(connection_string), Ok(container)) = ( env::var("SCCACHE_AZURE_CONNECTION_STRING"), @@ -1453,6 +1464,7 @@ service_account = "example_service_account" [cache.gha] enabled = true version = "sccache" +as_local = false [cache.memcached] # Deprecated alias for `endpoint` @@ -1519,7 +1531,8 @@ no_credentials = true }), gha: Some(GHACacheConfig { enabled: true, - version: "sccache".to_string() + version: "sccache".to_string(), + as_local: false, }), redis: Some(RedisCacheConfig { url: Some("redis://user:passwd@1.2.3.4:6379/?db=1".to_owned()), diff --git a/src/server.rs b/src/server.rs index 70dc4f859..47d061f57 100644 --- a/src/server.rs +++ b/src/server.rs @@ -13,6 +13,8 @@ // limitations under the License.SCCACHE_MAX_FRAME_LENGTH use crate::cache::readonly::ReadOnlyStorage; +#[cfg(feature = "gha")] +use crate::cache::upload_local_cache; use crate::cache::{storage_from_config, CacheMode, Storage}; use crate::compiler::{ get_compiler_info, CacheControl, CompileResult, Compiler, CompilerArguments, CompilerHasher, @@ -466,7 +468,6 @@ pub fn start_server(config: &Config, port: u16) -> Result<()> { CacheMode::ReadOnly => Arc::new(ReadOnlyStorage(raw_storage)), _ => raw_storage, }; - let res = SccacheServer::::new(port, runtime, client, dist_client, storage); match res { @@ -474,7 +475,12 @@ pub fn start_server(config: &Config, port: u16) -> Result<()> { let port = srv.port(); info!("server started, listening on port {}", port); notify_server_startup(¬ify, ServerStartup::Ok { port })?; - srv.run(future::pending::<()>())?; + let runtime = srv.run(future::pending::<()>())?; + + // TODO How do we propagate this error to the client? + #[cfg(feature = "gha")] + runtime.block_on(upload_local_cache(config))?; + Ok(()) } Err(e) => { @@ -589,7 +595,7 @@ impl SccacheServer { /// If the `shutdown` future resolves then the server will be shut down, /// otherwise the server may naturally shut down if it becomes idle for too /// long anyway. - pub fn run(self, shutdown: F) -> io::Result<()> + pub fn run(self, shutdown: F) -> io::Result where F: Future, C: Send, @@ -676,7 +682,7 @@ impl SccacheServer { info!("ok, fully shutting down now"); - Ok(()) + Ok(runtime) } }