From 9de85c277becb87f763ae228c888a6f41c6f80f3 Mon Sep 17 00:00:00 2001 From: Sanjay Babu Date: Fri, 1 Sep 2023 16:14:34 -0700 Subject: [PATCH 1/4] updating records with deleted documents --- api/database.json-sample | 8 ++ .../20230901215837-updateDeletedDocRecords.js | 75 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 api/database.json-sample create mode 100644 api/migrations/20230901215837-updateDeletedDocRecords.js diff --git a/api/database.json-sample b/api/database.json-sample new file mode 100644 index 000000000..2bc5df371 --- /dev/null +++ b/api/database.json-sample @@ -0,0 +1,8 @@ +{ + "defaultEnv": "local", + "local": { + "driver": "mongodb", + "database": "dbname", + "host": "localhost" + } +} diff --git a/api/migrations/20230901215837-updateDeletedDocRecords.js b/api/migrations/20230901215837-updateDeletedDocRecords.js new file mode 100644 index 000000000..308940358 --- /dev/null +++ b/api/migrations/20230901215837-updateDeletedDocRecords.js @@ -0,0 +1,75 @@ +'use strict'; + +var dbm; +var type; +var seed; + +/** + * We receive the dbmigrate dependency from dbmigrate initially. + * This enables us to not have to rely on NODE_PATH. + */ +exports.setup = function(options, seedLink) { + dbm = options.dbmigrate; + type = dbm.dataType; + seed = seedLink; +}; + +exports.up = async function(db) { + console.log('**** Updating Records with Deleted Attachments ****'); + + // Collection Names + const collectionName = 'redacted_record_subset'; + const nrptiCollectionName = 'nrpti'; + + const mClient = await db.connection.connect(db.connectionString, { + native_parser: true + }); + + + const collection = await mClient.collection(collectionName); + const nrptiCollection = await mClient.collection(nrptiCollectionName); + + // Subquery to find _id values with schema 'Document' in both collections + const subquery = [ + { _schemaName: 'Document' }, + { _schemaName: 'Document' } + ]; + + Promise.all([ + collection.find({ $or: subquery }, { _id: 1 }).toArray(), + nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray() + ]) + .then(([collectionResults, nrptiResults]) => { + // Extract _id values from the subquery results + const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; + + // Update documents to an empty array where _id is not in validIds + const updateQuery = { + _id: { $nin: validIds } + }; + + const updateOperation = { + $set: { + documents: [] + } + }; + + return collection.updateMany(updateQuery, updateOperation); + }) + .then(updateResult => { + console.log(`Updated ${updateResult.modifiedCount} documents in the ${collectionName} collection.`); + }) + .catch(err => { + }); + + mClient.close(); + +} + +exports.down = function(db) { + return null; +}; + +exports._meta = { + "version": 1 +}; From af1f999680cb973d53476dc9f463074758375ac6 Mon Sep 17 00:00:00 2001 From: Sanjay Babu Date: Wed, 6 Sep 2023 08:27:14 -0700 Subject: [PATCH 2/4] update records --- .../20230901215837-updateDeletedDocRecords.js | 90 +++++++++++++------ 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/api/migrations/20230901215837-updateDeletedDocRecords.js b/api/migrations/20230901215837-updateDeletedDocRecords.js index 308940358..b9cbb8cd4 100644 --- a/api/migrations/20230901215837-updateDeletedDocRecords.js +++ b/api/migrations/20230901215837-updateDeletedDocRecords.js @@ -4,6 +4,7 @@ var dbm; var type; var seed; + /** * We receive the dbmigrate dependency from dbmigrate initially. * This enables us to not have to rely on NODE_PATH. @@ -14,55 +15,86 @@ exports.setup = function(options, seedLink) { seed = seedLink; }; + exports.up = async function(db) { console.log('**** Updating Records with Deleted Attachments ****'); - - // Collection Names - const collectionName = 'redacted_record_subset'; - const nrptiCollectionName = 'nrpti'; - const mClient = await db.connection.connect(db.connectionString, { native_parser: true }); + try { + // Collection Names + const redactedCollectionName = 'redacted_record_subset'; + const nrptiCollectionName = 'nrpti'; + + - const collection = await mClient.collection(collectionName); + const redactedCollection = await mClient.collection(redactedCollectionName); const nrptiCollection = await mClient.collection(nrptiCollectionName); + + // Subquery to find _id values with schema 'Document' in both collections const subquery = [ { _schemaName: 'Document' }, { _schemaName: 'Document' } ]; - Promise.all([ - collection.find({ $or: subquery }, { _id: 1 }).toArray(), - nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray() - ]) - .then(([collectionResults, nrptiResults]) => { + const collectionResults = await redactedCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + + const nrptiResults = await nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + + + + console.log('inthen>>>>>') // Extract _id values from the subquery results - const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; + const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; + + let redactedDocumentsIds = await redactedCollection.find({_schemaName: 'Document'}).toArray(); + redactedDocumentsIds=redactedDocumentsIds.map(item => item._id); + + console.log('validIds= ' + validIds.length) - // Update documents to an empty array where _id is not in validIds - const updateQuery = { - _id: { $nin: validIds } - }; + + const cursor = redactedCollection.find({ + "documents": { "$exists": true, "$not": { "$size": 0 } } + }); // You can specify a filter to narrow down the documents - const updateOperation = { - $set: { - documents: [] + +console.log('before_cursor') +let ct = 0; + await cursor.forEach(record => { + if(!redactedDocumentsIds.includes(record['documents'][0])) + { + console.log('in_if' + ct) + redactedCollection.updateOne( + {_id: record._id}, + {$set: {documents: []}} + ) } - }; - - return collection.updateMany(updateQuery, updateOperation); - }) - .then(updateResult => { - console.log(`Updated ${updateResult.modifiedCount} documents in the ${collectionName} collection.`); - }) - .catch(err => { - }); + else{ + console.log('in_else' + ct) + } + +ct++; + + + }, + () => { + console.log('in_completion') + // This is the completion callback, called when the iteration is complete + mClient.close(); // Close the MongoDB client connection + console.log('Done.'); + } + ); - mClient.close(); + + //mClient.close(); + } catch (err) { + console.log('Error:', err); + } + + // return null; } From 3b72997f11eab0761e94c71494feed2ac7726084 Mon Sep 17 00:00:00 2001 From: Sanjay Babu Date: Thu, 7 Sep 2023 15:13:30 -0700 Subject: [PATCH 3/4] update deleted doc records --- .../20230901215837-updateDeletedDocRecords.js | 45 ++++++++++++------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/api/migrations/20230901215837-updateDeletedDocRecords.js b/api/migrations/20230901215837-updateDeletedDocRecords.js index b9cbb8cd4..00bbc18ad 100644 --- a/api/migrations/20230901215837-updateDeletedDocRecords.js +++ b/api/migrations/20230901215837-updateDeletedDocRecords.js @@ -30,31 +30,35 @@ exports.up = async function(db) { const redactedCollection = await mClient.collection(redactedCollectionName); - const nrptiCollection = await mClient.collection(nrptiCollectionName); + //const nrptiCollection = await mClient.collection(nrptiCollectionName); // Subquery to find _id values with schema 'Document' in both collections - const subquery = [ - { _schemaName: 'Document' }, - { _schemaName: 'Document' } - ]; + // const subquery = [ + // { _schemaName: 'Document' }, + // { _schemaName: 'Document' } + // ]; - const collectionResults = await redactedCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + // const collectionResults = await redactedCollection.find({ $or: subquery }, { _id: 1 }).toArray(); - const nrptiResults = await nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + // const nrptiResults = await nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray(); - console.log('inthen>>>>>') + //console.log('inthen>>>>>') // Extract _id values from the subquery results - const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; + //const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; let redactedDocumentsIds = await redactedCollection.find({_schemaName: 'Document'}).toArray(); redactedDocumentsIds=redactedDocumentsIds.map(item => item._id); - console.log('validIds= ' + validIds.length) + //console.log('validIdsCount= ' + validIds.length) + console.log('redactedCount= ' + redactedDocumentsIds.length) + + console.log('redacted1=' + redactedDocumentsIds[0]); + console.log('redacted2=' + redactedDocumentsIds[1]); const cursor = redactedCollection.find({ "documents": { "$exists": true, "$not": { "$size": 0 } } @@ -66,16 +70,23 @@ let ct = 0; await cursor.forEach(record => { if(!redactedDocumentsIds.includes(record['documents'][0])) { - console.log('in_if' + ct) - redactedCollection.updateOne( - {_id: record._id}, - {$set: {documents: []}} - ) + console.log('in_if' + ct + 'documentid=' + record['documents'][0]) + // redactedCollection.updateOne( + // {_id: record._id}, + // {$set: {documents: []}} + // ) } else{ - console.log('in_else' + ct) + console.log('in_else' + ct) } - +// const foundElement = redactedDocumentsIds.some(item => item === record['documents'][0]); +// if (foundElement === undefined) { +// // Element not found +// // console.log('in_if' + ct + 'documentid=' + record['documents'][0]) +// } +// else{ +// console.log('in_else' + ct + 'documentid=' + record['documents'][0]) +// } ct++; From f222b195f9ab65b78e64b27da54d89e0fb3474df Mon Sep 17 00:00:00 2001 From: David <62899351+davidclaveau@users.noreply.github.com> Date: Fri, 8 Sep 2023 12:19:57 -0700 Subject: [PATCH 4/4] update migration to backcheck documents --- .../20230901215837-updateDeletedDocRecords.js | 129 ++++++++---------- 1 file changed, 54 insertions(+), 75 deletions(-) diff --git a/api/migrations/20230901215837-updateDeletedDocRecords.js b/api/migrations/20230901215837-updateDeletedDocRecords.js index 00bbc18ad..ba71fb449 100644 --- a/api/migrations/20230901215837-updateDeletedDocRecords.js +++ b/api/migrations/20230901215837-updateDeletedDocRecords.js @@ -1,10 +1,11 @@ 'use strict'; +const { ObjectId } = require('mongodb'); + var dbm; var type; var seed; - /** * We receive the dbmigrate dependency from dbmigrate initially. * This enables us to not have to rely on NODE_PATH. @@ -15,104 +16,82 @@ exports.setup = function(options, seedLink) { seed = seedLink; }; - exports.up = async function(db) { - console.log('**** Updating Records with Deleted Attachments ****'); + const mClient = await db.connection.connect(db.connectionString, { native_parser: true }); - try { - // Collection Names - const redactedCollectionName = 'redacted_record_subset'; - const nrptiCollectionName = 'nrpti'; - - - - - const redactedCollection = await mClient.collection(redactedCollectionName); - //const nrptiCollection = await mClient.collection(nrptiCollectionName); - - - // Subquery to find _id values with schema 'Document' in both collections - // const subquery = [ - // { _schemaName: 'Document' }, - // { _schemaName: 'Document' } - // ]; - - // const collectionResults = await redactedCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + try { + console.log('**** Started tracking all documents in redacted_record_subset ****'); - // const nrptiResults = await nrptiCollection.find({ $or: subquery }, { _id: 1 }).toArray(); + const nrptiCollection = await mClient.collection('nrpti'); + const redactedRecordSubsetCollection = await mClient.collection('redacted_record_subset'); + // Find all documents with an existing, non-empty "documents" array in redacted_record_subset collection + const matchingDocuments = await redactedRecordSubsetCollection + .find({ + "documents": { $ne: [], $exists: true } + }) + .toArray(); + console.log('**** Found ' + matchingDocuments.length + ' documents with an existing, non-empty "documents" array in redacted_record_subset collection ****'); - //console.log('inthen>>>>>') - // Extract _id values from the subquery results - //const validIds = [...new Set([...collectionResults, ...nrptiResults].map(item => item._id))]; + // Take all ObjectIDs from these "documents" arrays and put them in a new array + const objectIDsArray = matchingDocuments.reduce((acc, doc) => { + acc.push(...doc.documents.map(id => new ObjectId(id))); + return acc; + }, []); - let redactedDocumentsIds = await redactedCollection.find({_schemaName: 'Document'}).toArray(); - redactedDocumentsIds=redactedDocumentsIds.map(item => item._id); + console.log('**** Found ' + objectIDsArray.length + ' ObjectIDs in the "documents" arrays ****'); - //console.log('validIdsCount= ' + validIds.length) + // Check if each ObjectID exists in the "nrpti" or "redacted_record_subset" collection + const matchingObjectIDs = []; + const nonMatchingObjectIDs = []; - console.log('redactedCount= ' + redactedDocumentsIds.length) + console.log('**** Checking if each ObjectID exists in the "nrpti" or "redacted_record_subset" collection ****'); - console.log('redacted1=' + redactedDocumentsIds[0]); - console.log('redacted2=' + redactedDocumentsIds[1]); - - const cursor = redactedCollection.find({ - "documents": { "$exists": true, "$not": { "$size": 0 } } - }); // You can specify a filter to narrow down the documents + for (const objectID of objectIDsArray) { + // Check in the nrpti collection + const nrptiDocument = await nrptiCollection.findOne({ _id: objectID, _schemaName: 'Document' }); + // Check in the redacted_record_subset collection + const redactedRecordSubsetDocument = await redactedRecordSubsetCollection.findOne({ _id: objectID, _schemaName: 'Document' }); -console.log('before_cursor') -let ct = 0; - await cursor.forEach(record => { - if(!redactedDocumentsIds.includes(record['documents'][0])) - { - console.log('in_if' + ct + 'documentid=' + record['documents'][0]) - // redactedCollection.updateOne( - // {_id: record._id}, - // {$set: {documents: []}} - // ) + if (nrptiDocument || redactedRecordSubsetDocument) { + matchingObjectIDs.push(objectID); + } else { + nonMatchingObjectIDs.push(objectID); } - else{ - console.log('in_else' + ct) } -// const foundElement = redactedDocumentsIds.some(item => item === record['documents'][0]); -// if (foundElement === undefined) { -// // Element not found -// // console.log('in_if' + ct + 'documentid=' + record['documents'][0]) -// } -// else{ -// console.log('in_else' + ct + 'documentid=' + record['documents'][0]) -// } -ct++; - - - }, - () => { - console.log('in_completion') - // This is the completion callback, called when the iteration is complete - mClient.close(); // Close the MongoDB client connection - console.log('Done.'); - } - ); + console.log('**** Found ' + matchingObjectIDs.length + ' matching ObjectIDs ****'); + console.log('**** Found ' + nonMatchingObjectIDs.length + ' non-matching ObjectIDs ****'); - //mClient.close(); - } catch (err) { - console.log('Error:', err); + console.log('**** Updating "documents" arrays in redacted_record_subset collection ****'); + + // Remove all non-matching ObjectIDs from the "documents" array in the redacted_record_subset collection + for (const nonMatchingObjectID of nonMatchingObjectIDs) { + // Update documents in the "redacted_record_subset" collection + const result = await redactedRecordSubsetCollection.updateMany( + { "documents": nonMatchingObjectID }, + { "$pull": { "documents": nonMatchingObjectID } } + ); + + console.log(`Removed ${result.modifiedCount} instances of ${nonMatchingObjectID} from "redacted_record_subset" collection.`); } + + } catch (error) { + console.error(`Migration did not complete. Error processing: ${error.message}`); + } finally { + mClient.close(); + } +}; - // return null; - -} - exports.down = function(db) { return null; }; exports._meta = { "version": 1 -}; +}; \ No newline at end of file