From 8730ed58f6c7630d63e86bcdc9a755d8904d3df0 Mon Sep 17 00:00:00 2001 From: NickOvt Date: Thu, 26 Sep 2024 11:56:36 +0300 Subject: [PATCH] fix(api-attachment): Calculate file content hash when uploading attachment ZMS-172 (#733) * Added submission api endpoint to api docs generation * on attachment upload calculate file content hash * make stream into separate file, refactor * create stream during the try to store. otherwise getting stuck * refactor file content hash update * safer file content hash handling * refactor code. Fix possible race condition * refactor function. Pass callback as last function param --- lib/attachments/gridstore-storage.js | 45 +++++++++++++++++++++++++--- lib/filehash-stream.js | 38 +++++++++++++++++++++++ 2 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 lib/filehash-stream.js diff --git a/lib/attachments/gridstore-storage.js b/lib/attachments/gridstore-storage.js index 87bb5020..d3fa858e 100644 --- a/lib/attachments/gridstore-storage.js +++ b/lib/attachments/gridstore-storage.js @@ -7,6 +7,7 @@ const errors = require('../errors'); const log = require('npmlog'); const crypto = require('crypto'); const base64Offset = require('./base64-offset'); +const FileHashCalculatorStream = require('../filehash-stream'); // Set to false to disable base64 decoding feature const FEATURE_DECODE_ATTACHMENTS = true; @@ -32,6 +33,23 @@ class GridstoreStorage { }); } + updateFileWithContentHashMetadata(args, hash, calculatedFileContentHash, callback) { + this.gridfs.collection(this.bucketName + '.files').findOneAndUpdate( + { + _id: hash + }, + { + $set: { + 'metadata.fileContentHash': calculatedFileContentHash + } + }, + { + returnDocument: 'after' + }, + () => callback(...args) // do not really care about error here. If error then highly likely the file has not been uploaded either + ); + } + async get(attachmentId) { let attachmentData = await this.gridfs.collection(this.bucketName + '.files').findOne({ _id: attachmentId @@ -129,6 +147,13 @@ class GridstoreStorage { let storeLock; let attachmentCallback = (...args) => { + // store finished uploading, add the hash of the file contents to file metadata + let calculatedFileContentHash; + + if (args.length > 2) { + calculatedFileContentHash = args[2]; + } + if (storeLock) { log.silly('GridStore', '[%s] UNLOCK lock=%s status=%s', instance, lockId, storeLock.success ? 'locked' : 'empty'); if (storeLock.success) { @@ -137,7 +162,11 @@ class GridstoreStorage { // might be already finished if retrying after delay return; } - callback(...args); + if (calculatedFileContentHash) { + // locked upload, new file + this.updateFileWithContentHashMetadata(args, hash, calculatedFileContentHash, callback); + return; // return from attachmentCallback. Top level callback will be ran after hash update + } }); // unset variable to prevent double releasing storeLock = false; @@ -149,6 +178,11 @@ class GridstoreStorage { // might be already finished if retrying after delay return; } + if (calculatedFileContentHash) { + // no lock upload, new file + this.updateFileWithContentHashMetadata(args, hash, calculatedFileContentHash, callback); + return; // return from attachmentCallback. Top level callback will be ran after hash update + } callback(...args); }; @@ -159,6 +193,8 @@ class GridstoreStorage { return; } + let fileHashCalculator = new FileHashCalculatorStream(); + this.gridfs.collection(this.bucketName + '.files').findOneAndUpdate( { _id: hash @@ -282,13 +318,14 @@ class GridstoreStorage { attachmentCallback(err); }); - store.once('finish', () => attachmentCallback(null, id)); + store.once('finish', () => attachmentCallback(null, id, fileHashCalculator.hash)); if (!metadata.decoded) { - store.end(attachment.body); + fileHashCalculator.pipe(store); + fileHashCalculator.end(attachment.body); } else { let decoder = new libbase64.Decoder(); - decoder.pipe(store); + decoder.pipe(fileHashCalculator).pipe(store); decoder.once('error', err => { // pass error forward store.emit('error', err); diff --git a/lib/filehash-stream.js b/lib/filehash-stream.js new file mode 100644 index 00000000..514c1154 --- /dev/null +++ b/lib/filehash-stream.js @@ -0,0 +1,38 @@ +'use strict'; + +const Transform = require('stream').Transform; +const crypto = require('crypto'); + +class FileHashCalculatorStream extends Transform { + constructor(options) { + super(options); + this.bodyHash = crypto.createHash('sha256'); + this.hash = null; + } + + updateHash(chunk) { + this.bodyHash.update(chunk); + } + + _transform(chunk, encoding, callback) { + if (!chunk || !chunk.length) { + return callback(); + } + + if (typeof chunk === 'string') { + chunk = Buffer.from(chunk, encoding); + } + + this.updateHash(chunk); + this.push(chunk); + + callback(); + } + + _flush(done) { + this.hash = this.bodyHash.digest('base64'); + done(); + } +} + +module.exports = FileHashCalculatorStream;