Skip to content

Commit

Permalink
feat(functions): functions dlp redact
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Teo committed Feb 12, 2024
1 parent 3bdd65a commit 1d72915
Show file tree
Hide file tree
Showing 10 changed files with 480 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ module.exports = {
plugins: ['@typescript-eslint', 'import'],
rules: {
'semi': ['error', 'always'],
'indent': ['error', 2],
'indent': ['error', 2, { SwitchCase: 1 }],
'quotes': ['error', 'single'],
'camelcase': 0,
'linebreak-style': ['error', 'unix'],
Expand Down
1 change: 0 additions & 1 deletion packages/functions/.env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
REKOGNITION_ACCESS_KEY_ID=<RETRIEVE_FROM_AWS_USER>
REKOGNITION_SECRET_ACCESS_KEY=<RETRIEVE_FROM_AWS_USER>
VIRUSTOTAL_API_KEY=<RETRIEVE_FROM_VIRUSTOTAL_DASHBOARD>
1 change: 1 addition & 0 deletions packages/functions/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
},
"dependencies": {
"@aws-sdk/client-rekognition": "^3.484.0",
"@google-cloud/dlp": "^5.1.1",
"firebase-admin": "^11.11.1",
"firebase-functions": "^4.3.1"
},
Expand Down
1 change: 1 addition & 0 deletions packages/functions/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ export * from './https/onCall/rekognition/getAuthToken';
export * from './https/onCall/file/getPassword';

export * from './storage/onObjectFinalized/user/onHeadshotUpload';
export * from './storage/onObjectFinalized/file/onFileUploadRedact';
export * from './storage/onObjectFinalized/file/onFileUploadEncrypt';
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ export const storage_onObjectFinalized_file_onFileUploadEncrypt = onObjectFinali

const fileName = basename(name);
const fileOwner = basename(dirname(name));
if (fileName.startsWith('enc_')) return logger.log(`File ${fileName} is already encrypted!`);
if (!fileName.startsWith('red_')) return logger.log(`File ${fileName} is not redacted yet!`);
if (fileOwner === '.') return logger.log(`File ${fileName} is not owned by any user!`);

const { name: fsFileName } = parse(fileName);
const fsFileRef = firestore.collection(FS_COLLECTION_FILES).doc(fsFileName);
const { name: fsFileName, base: fsFileBase } = parse(fileName);
const fsFileRef = firestore.collection(FS_COLLECTION_FILES).doc(fsFileName.replace('red_', ''));
const fsFileSnapshot = await fsFileRef.get();
const fsFileData = fsFileSnapshot.data() as FSFile;

Expand Down Expand Up @@ -76,11 +76,18 @@ export const storage_onObjectFinalized_file_onFileUploadEncrypt = onObjectFinali
csFileObject
);

const csFileEncName = `${fileOwner}/enc_${fileName}`;
const csFileOriName = `${fileOwner}/${fsFileBase.replace('red_', '')}`;
const csFileEncName = `${fileOwner}/enc_${fsFileBase.replace('red_', '')}`;

const csFileOriRef = bucketFiles.file(csFileOriName);
const csFileEncRef = bucketFiles.file(csFileEncName);

try {
await csFileEncRef.save(Buffer.from(fsFileEncObject));
await Promise.all([
csFileRef.delete(),
csFileOriRef.delete(),
csFileEncRef.save(Buffer.from(fsFileEncObject), { contentType }),
]);
} catch (error) {
logger.error(`Error saving encrypted file ${fileName}!`);
logger.error(error);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import type { FSFile } from '@armadillo/shared';
import type { google } from '@google-cloud/dlp/build/protos/protos';
import type { DocumentSnapshot, DocumentData } from 'firebase-admin/firestore';

import { DlpServiceClient } from '@google-cloud/dlp';
import {
BUCKET_FILES,
DLP_INFOTYPES_SENSITIVITY_MODERATE,
DLP_INFOTYPES_SENSITIVITY_HIGH,
} from '@armadillo/shared';

import { parse, basename, dirname } from 'path';

import { FieldValue } from 'firebase-admin/firestore';
import { logger } from 'firebase-functions/v2';
import { onObjectFinalized } from 'firebase-functions/v2/storage';

import { firestore, bucketFiles } from '../../../firebase';

export const storage_onObjectFinalized_file_onFileUploadRedact = onObjectFinalized(
{
bucket: BUCKET_FILES,
},
async ({ data }) => {
const { name, size, contentType } = data;

// Checks if the file uploaded is a directory or unknown file type
if (!contentType || contentType === 'application/octet-stream')
return logger.log(
`File ${name} is being ignored. It is either a folder creation, or does not have a valid content type!`
);

// Checks if the file uploaded is larger than 32MB
if (size > 32 * 33554432)
return logger.log(`File ${name} is being ignored. It is bigger than 32MB!`);

// Extracts the file name and extension
// E.g., /ownerid/filename.ext -> filename.ext
const fileName = basename(name);
const fileOwner = basename(dirname(name));

// Checks if the file is already redacted
if (fileName.startsWith('red_'))
return logger.log(`File ${name} is being ignored. It is already redacted!`);

// Checks if the file is already encrypted
if (fileName.startsWith('enc_'))
return logger.log(`File ${name} is being ignored. It is an encrypted file!`);

// Checks if the file is owned by any user
if (fileOwner === '.')
return logger.log(`File ${name} is being ignored. It is not owned by any user!`);

const { name: csFileName, base: csFileBase } = parse(fileName);
const csFileRef = bucketFiles.file(name);
const fsFileRef = firestore.collection('files').doc(csFileName);

let fsFileSnapshot: DocumentSnapshot<DocumentData>;
try {
fsFileSnapshot = await fsFileRef.get();
} catch (error) {
return logger.error(`Unable to retrieve file ${csFileName} from Firestore: ${error}`);
}

// Skip redaction process if file does not exist in Firestore
const fsFileExists = fsFileSnapshot.exists;
if (!fsFileExists) return logger.error(`File ${fsFileRef.path} does not exist in Firestore!`);

let fsFileData: FSFile;
try {
fsFileData = fsFileSnapshot.data() as FSFile;
} catch (error) {
return logger.error(`Unable to retrieve data from file ${csFileName} in Firestore: ${error}`);
}

let csFileExists: boolean;
try {
[csFileExists] = await csFileRef.exists();
if (!csFileExists)
return logger.error(
`Aborting redaction process. File ${csFileRef.name} does not exist in Cloud Storage!`
);
} catch (error) {
return logger.error(
`Unable to retrieve existence of file ${csFileRef.name} from Cloud Storage: ${error}`
);
}

// Skip redaction process if file is classified as TOP SECRET
if (fsFileData.file_classification === 'TOPSECRET') {
try {
await csFileRef.rename(`red_${csFileBase}`);
return logger.log(`File ${csFileName} is classified as TOP SECRET! Redaction skipped!`);
} catch (error) {
return logger.error(
`Unable to rename file ${csFileName} to redacted version in Cloud Storage: ${error}`
);
}
}

// Skip redaction process if file content type is not 'text/plain'
if (contentType !== 'text/plain') {
try {
await csFileRef.rename(`red_${csFileBase}`);
return logger.log(
`File ${csFileName} is not content type 'text/plain'! Redaction skipped!`
);
} catch (error) {
return logger.error(
`Unable to rename file ${csFileName} to redacted version in Cloud Storage: ${error}`
);
}
}

let csFileObject: Buffer;
try {
[csFileObject] = await csFileRef.download();
} catch (error) {
return logger.error(`Unable to download file ${csFileRef.name} from Cloud Storage: ${error}`);
}

const dlpClient = new DlpServiceClient();
const redactTypes: google.privacy.dlp.v2.IInfoType[] =
fsFileData.file_classification === 'SENSITIVE'
? [...DLP_INFOTYPES_SENSITIVITY_MODERATE]
: [...DLP_INFOTYPES_SENSITIVITY_MODERATE, ...DLP_INFOTYPES_SENSITIVITY_HIGH];

let dlpRedactResponse: google.privacy.dlp.v2.IDeidentifyContentResponse;
try {
[dlpRedactResponse] = await dlpClient.deidentifyContent({
parent: 'projects/it2566-armadillo/locations/global',
inspectConfig: {
infoTypes: redactTypes,
minLikelihood: 'VERY_UNLIKELY',
},
deidentifyConfig: {
infoTypeTransformations: {
transformations: [
{
infoTypes: redactTypes,
primitiveTransformation: {
replaceConfig: {
newValue: {
stringValue: '********',
},
},
},
},
],
},
},
item: {
byteItem: {
type: 'TEXT_UTF8',
data: csFileObject,
},
},
});
} catch (error) {
return logger.error(`Unable to redact file ${csFileName} using DLP: ${error}`);
}

const dlpRedactObject = dlpRedactResponse.item?.byteItem?.data;
if (!dlpRedactObject)
return logger.error(`Unable to retrieve redacted file ${csFileName} from DLP!`);

const dlpRedactObjectBuffer = Buffer.from(dlpRedactObject);
const csFileRedactName = `${fileOwner}/red_${csFileBase}`;
const csFileRedactRef = bucketFiles.file(csFileRedactName);

try {
await csFileRedactRef.save(dlpRedactObjectBuffer, { contentType });
} catch (error) {
logger.error(`Error saving redacted file ${csFileRedactName}!`);
return logger.error(error);
}

const fsFileUpdate: Partial<FSFile> = {
file_status: 'REDACTED',
updated_at: FieldValue.serverTimestamp(),
};

return fsFileRef.set(fsFileUpdate, { merge: true });
}
);
Loading

0 comments on commit 1d72915

Please sign in to comment.