Skip to content

Commit

Permalink
fix: on validating container, gdc logic will skip case caching and st…
Browse files Browse the repository at this point in the history
…ale cache check (#1950)
  • Loading branch information
xzhou82 authored Jul 26, 2024
1 parent 36e6375 commit bd2b2c8
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 39 deletions.
3 changes: 2 additions & 1 deletion release.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@

Fixes:
- on validating container, gdc logic will skip case caching and stale cache check
92 changes: 54 additions & 38 deletions server/src/termdb.gdc.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ initGDCdictionary
assignDefaultBins
makeTermdbQueries
getOpenProjects
testGDCapi
testRestApi
testGraphqlApi
cacheSampleIdMapping
fetchIdsFromGdcApi
runRemainingWithoutAwait
testGDCapi
testRestApi
testGraphqlApi
mayCacheSampleIdMappingWithRegularCheck
cacheSampleIdMapping
fetchIdsFromGdcApi
mayReCacheCaseIdMapping
caseCacheIsStale
******************** major tasks *****************
Expand Down Expand Up @@ -751,17 +754,12 @@ async function runRemainingWithoutAwait(ds) {
await testGDCapi(ds)
try {
// obtain case id mapping for the first time and store at ds.__gdc
await cacheSampleIdMapping(ds)
await mayCacheSampleIdMappingWithRegularCheck(ds)
} catch (e) {
if (e.stack) console.log(e.stack)
throw 'cacheSampleIdMapping() failed: ' + (e.message || e)
}

if (!serverconfig.features.noPeriodicCheckGdcCaseCache) {
// cache check is allowed. this is required for gdc prod environment and can be disabled on dev env.
// kick off stale cache check
setTimeout(() => mayReCacheCaseIdMapping(ds), cacheCheckWait)
}
// add any other gdc stuff
}

async function getApiStatus(ds) {
Expand Down Expand Up @@ -921,16 +919,38 @@ async function testGraphqlApi(url, headers) {
console.log('GDC GraphQL API okay: ' + url, new Date() - t, 'ms')
}

/*
cache gdc sample id mappings
** this is an optional step and can be skipped on dev machines **
- create a map from sample aliquot id to sample submitter id, for displaying in mds3 tk
- create a map from differet ids to case uuid, for creating gdc cohort with selected samples
- cache list of case uuids with expression data
async function mayCacheSampleIdMappingWithRegularCheck(ds) {
// may do it because it could be disabled by feature toggle
// caching action is fine-tuned by the feature toggle on a pp instance; log out detailed status per setting
const fv = serverconfig.features.stopGdcCacheAliquot
if (fv === true) {
// do not cache at all. this flag is auto-set for container validation. running stale cache check will cause the server process not to quit, and break validation, thus must skip this when flag is true
console.log('GDC: sample IDs are not cached! No periodic check will take place!')
initGdcHolder(ds) // though nothing is cached, must init the cache holder so not to break code that accesses this holder
///////////////////// NOTE ///////////////////////
// with missing cache for case id mapping and cases with exp data, any query using gdc gene exp data will not work!!
// this should be fine for container validation, but may not do so on a dev environment
return
}
if (Number.isInteger(fv) && fv > 0) {
// flag value is integer. allow to run a short test on dev machine
console.log('GDC: running limited sample ID caching')
} else {
// for any other flag value (or not set), will cache everything. this should be on prod server
console.log('GDC: caching complete sample ID mapping')
}

function will rerun when it detects stale case id cache
*/
async function cacheSampleIdMapping(ds) {
await cacheSampleIdMapping(ds)

// regular check will only happen if mapping is actually cached
if (!serverconfig.features.noPeriodicCheckGdcCaseCache) {
// cache check is allowed. this is required for gdc prod environment and can be disabled on dev env.
// kick off stale cache check
setTimeout(() => mayReCacheCaseIdMapping(ds), cacheCheckWait)
}
}

function initGdcHolder(ds) {
// gather these arbitrary gdc stuff under __gdc{} to be safe
// do not freeze the object; they will be rewritten if cache is stale
ds.__gdc = {
Expand Down Expand Up @@ -965,6 +985,19 @@ async function cacheSampleIdMapping(ds) {
gdcOpenProjects: new Set(), // names of open-access projects
doneCaching: false
}
}

/*
cache gdc sample id mappings
** this is an optional step and can be skipped on dev machines **
- create a map from sample aliquot id to sample submitter id, for displaying in mds3 tk
- create a map from differet ids to case uuid, for creating gdc cohort with selected samples
- cache list of case uuids with expression data
function will rerun when it detects stale case id cache
*/
async function cacheSampleIdMapping(ds) {
initGdcHolder(ds)

try {
await getOpenProjects(ds)
Expand All @@ -973,23 +1006,6 @@ async function cacheSampleIdMapping(ds) {
throw 'getOpenProjects() failed: ' + (e.message || e)
}

// caching action is fine-tuned by the feature toggle on a pp instance; log out detailed status per setting
if ('stopGdcCacheAliquot' in serverconfig.features) {
// flag is set
if (Number.isInteger(serverconfig.features.stopGdcCacheAliquot)) {
// flag value is integer (suppose to be positive integer)
// allow to run a short test on dev machine
console.log('GDC: running limited sample ID caching')
} else {
// flag value is not integer, do not run this function at all
console.log('GDC: sample IDs are not cached!')
return
}
} else {
// flag not set; this should be on prod server
console.log('GDC: caching complete sample ID mapping')
}

const size = 1000 // fetch 1000 ids at a time
const totalCases = await fetchIdsFromGdcApi(ds, 1, 0)
if (!Number.isInteger(totalCases)) throw 'totalCases not integer'
Expand Down

0 comments on commit bd2b2c8

Please sign in to comment.