Skip to content

Commit

Permalink
restrict number of processed files by features.gdcMafMaxFileSize
Browse files Browse the repository at this point in the history
  • Loading branch information
xzhou82 committed Oct 27, 2023
1 parent 6c13aca commit bd07073
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 9 deletions.
2 changes: 1 addition & 1 deletion client/src/gdc.maf.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ async function getFilesAndShowTable(obj) {
let data
try {
data = await dofetch3('gdc/mafBuild', { body: { fileIdLst } })
if (data.error) throw data.error
} catch (e) {
// do not proceed upon err
sayerror(holder, e)
return
}
Expand Down
53 changes: 45 additions & 8 deletions server/routes/gdc.mafBuild.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
//import { GdcMafBuildResponse } from '#shared/types/routes/gdc.mafBuild.ts'
import got from 'got'
import path from 'path'
import fs from 'fs'
import { run_rust } from '@sjcrh/proteinpaint-rust'
import serverconfig from '#src/serverconfig.js'

const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 50000000 // 50Mb

export const api = {
endpoint: 'gdc/mafBuild',
Expand All @@ -14,12 +15,8 @@ export const api = {
return async (req: any, res: any): Promise<void> => {
try {
await buildMaf(req, res)
/*
const result = await buildMaf(req)
const payload = { result } as GdcMafBuildResponse
res.send(payload)
*/
} catch (e) {
if (e.stack) console.log(e.stack)
res.send({ status: 'error', error: e.message || e })
}
}
Expand All @@ -45,12 +42,12 @@ req.query {
res{}
*/
async function buildMaf(req: any, res: any) {
if (!Array.isArray(req.query.fileIdLst) || req.query.fileIdLst.length == 0) throw 'fileIdLst[] not array or blank'
const fileLst2 = (await getFileLstUnderSizeLimit(req.query.fileIdLst)) as string[]

const outFile = path.join(serverconfig.cachedir, 'gdcMaf.' + Math.random().toString()) // should be a gzipped file. does it need to end with '.gz' or it's auto-added?

const arg = {
fileIdLst: req.query.fileIdLst,
fileIdLst: fileLst2,
host: path.join(apihost, 'data'), // must use the /data/ endpoint from current host
outFile
}
Expand All @@ -59,10 +56,50 @@ async function buildMaf(req: any, res: any) {

const data = await fs.promises.readFile(outFile)

// by directly returning a blob, it won't tell client how many files are used

res.writeHead(200, {
'Content-Type': 'application/octet-stream',
'Content-Disposition': 'attachment; filename=cohort.maf.gz',
'Content-Length': data.length
})
res.end(Buffer.from(data, 'binary'))
}

/*
query api get size of each input maf file, and only process those files with total size under a set limit,
excess files are not processed in order not to crash server
must not rely on file size sent by client, as that can be spoofed and never to be trusted
it's inexpensive to query api for this
*/
async function getFileLstUnderSizeLimit(lst: string[]) {
if (lst.length == 0) throw 'fileIdLst[] not array or blank'
const data = {
filters: {
op: 'in',
content: { field: 'file_id', value: lst }
},
size: 1000,
fields: 'file_size'
}
const headers = { 'Content-Type': 'application/json', Accept: 'application/json' }
const response = await got.post(path.join(apihost, 'files'), { headers, body: JSON.stringify(data) })
let re
try {
re = JSON.parse(response.body)
} catch (e) {
throw 'invalid json from getFileLstUnderSizeLimit'
}
if (!Array.isArray(re.data?.hits)) throw 're.data.hits[] not array'
const out = [] as string[]
let cumsize = 0
for (const h of re.data.hits) {
if (cumsize >= maxTotalSizeCompressed) break // maxed out
if (!h.id) throw '.id missing'
if (!Number.isInteger(h.file_size)) throw '.file_size not integer'
cumsize += h.file_size
out.push(h.id)
}
if (out.length == 0) throw 'no file available'
return out
}

0 comments on commit bd07073

Please sign in to comment.