From 402429cf291074beb4ba824872604c379af8dd47 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:19:19 +0100 Subject: [PATCH 1/8] Update README --- README.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d9c3ea..4c5249c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,88 @@ # archivist-utilities -Tools for users of archivist + +This is a tool that implements the functionality described at https://ucldata.atlassian.net/wiki/spaces/CLOS/pages/37323492/Using+Txt+Files + +The tool is accessible at https://closer-cohorts.github.io/archivist-utilities/ + +It accepts as input an Excel file (.xslx) + +The worksheet containing data from which the qv, tv and tq mappings are created MUST be the first worksheet in the file. This worksheet can have any name. + +The worksheet containing data from which the dv mappings are created MUST be the second worksheet in the file. This worksheet can have any name. + +# QV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files: + + - Questionnaire prefix + - Question name + - Dataset prefix + - Variable name + +The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable. + +A separate qv text file will be created for each unique questionnaire prefix value in the first worksheet in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_qv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_qv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Questionnaire prefix' column) + +# TV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files: + + - Dataset prefix + - Variable name + - Topic id + +The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. + +A separate tv text file will be created for each unique dataset prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_tv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_tv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Dataset prefix' column) + +# TQ mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the tq text files: + + - Questionnaire prefix + - Question name + - Topic id + +The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable. + +A separate tq text file will be created for each unique questionnaire prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_tq.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_tq.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Questionnaire prefix' column) + +# DV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the dv text files: + + - Dataset prefix + - Derived variable name + - Source variable name + + The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. + +A separate dv text file will be created for each unique dataset prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_dv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_dv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Dataset prefix' column) + +# Text mapping file generation + +To generate the text mapping files for Archivist from an Excel XLSX file, simply: + +1. Select the Excel XLSX file from your local computer by pressing the 'Choose file' button +2. A 'Convert file' button will appear after you have selected an XLSX file. Press this button. +3. Links to the various text files generated from this XLSX file will now displayed on the screen, and clicking on these will download the text files to your local computer. \ No newline at end of file From 5230401409def7828bbed1dc136837f4e3a52173 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:25:12 +0100 Subject: [PATCH 2/8] Modify program so column headers in worksheets are no longer case-sensitive --- index.html | 75 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/index.html b/index.html index bc98f14..f3f37c2 100644 --- a/index.html +++ b/index.html @@ -38,6 +38,29 @@

Generate text mappings files

} + function columnNamesToLowerCase(worksheetDataByRows) { + + const rowValues = [] + + worksheetDataByRows.forEach(worksheetRow => { + + // We need to create a new object to hold the row data so we can ensure the + // field names of the object are all lower case... + + var rowData = {} + + // console.log(worksheetRow) + + Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName]) + + rowValues.push(rowData) + + }) + + return rowValues + + } + function createBlobURL(worksheetDataByRows, columns, suffix = {}) { var fileData = "" @@ -49,8 +72,6 @@

Generate text mappings files

var rowData = {} - Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName]) - const rowValues = [] columns.map(columnName => rowValues.push(rowData[columnName.toLowerCase()] @@ -108,16 +129,17 @@

Generate text mappings files

const qvTvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[0]] - const qvTvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" }); + const qvTvWorksheetOrderedByRows = columnNamesToLowerCase( + XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" })) var datasetPrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell => + worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell => (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell)) ))) var questionnairePrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Questionnaire prefix']).filter(questionnairePrefixCell => + worksheetRow => worksheetRow['questionnaire prefix']).filter(questionnairePrefixCell => (!!questionnairePrefixCell && !(questionnairePrefixCell.includes("NA") || questionnairePrefixCell.includes("Derived") || !questionnairePrefixCell)) ))) @@ -125,9 +147,9 @@

Generate text mappings files

questionnairePrefixes.forEach(questionnairePrefix => { var qvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Questionnaire prefix'] == questionnairePrefix), - ['Questionnaire prefix', 'Question Name', 'Dataset prefix', 'Variable Name'], - { 'Questionnaire prefix': '_ccs01' }) + worksheetRow => worksheetRow['questionnaire prefix'] == questionnairePrefix), + ['questionnaire prefix', 'question name', 'dataset prefix', 'variable name'], + { 'questionnaire prefix': '_ccs01' }) addLinkToPage(qvLinkURL, `${questionnairePrefix}_qv.txt`, @@ -139,8 +161,8 @@

Generate text mappings files

datasetPrefixes.forEach(datasetPrefix => { var tvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), - ['Dataset prefix', 'Variable Name', 'Topic ID']) + worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), + ['dataset prefix', 'variable name', 'topic id']) addLinkToPage(tvLinkURL, `${datasetPrefix}_tv.txt`, @@ -152,22 +174,22 @@

Generate text mappings files

simplifiedData = [] qvTvWorksheetOrderedByRows.forEach(worksheetRow => { - if (worksheetRow['Question Name'].includes("$")) { - indexOfDollarSign = worksheetRow['Question Name'].indexOf("$") - worksheetRow['Question Name'] = (worksheetRow['Question Name'].slice(0, indexOfDollarSign)) + if (worksheetRow['question name'].includes("$")) { + indexOfDollarSign = worksheetRow['question name'].indexOf("$") + worksheetRow['question name'] = (worksheetRow['question name'].slice(0, indexOfDollarSign)) } - if (simplifiedData.filter(dataRow => dataRow['Question Name'] == worksheetRow['Question Name'] - && dataRow['Topic ID'] == worksheetRow['Topic ID']).length == 0) + if (simplifiedData.filter(dataRow => dataRow['question name'] == worksheetRow['question name'] + && dataRow['topic id'] == worksheetRow['topic id']).length == 0) simplifiedData.push(worksheetRow) }) questionnairePrefixes.forEach(questionnairePrefix => { var tqLinkURL = createBlobURL(simplifiedData.filter( - dataRow => dataRow['Questionnaire prefix'] == questionnairePrefix), - ['Questionnaire prefix', 'Question Name', 'Topic ID'], - { 'Questionnaire prefix': '_ccs01' }) + dataRow => dataRow['questionnaire prefix'] == questionnairePrefix), + ['questionnaire prefix', 'question name', 'topic id'], + { 'questionnaire prefix': '_ccs01' }) addLinkToPage(tqLinkURL, `${questionnairePrefix}_tq.txt`, @@ -182,18 +204,20 @@

Generate text mappings files

const dvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" }); - datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell => + const dvWorksheetOrderedByRows2 = columnNamesToLowerCase(dvWorksheetOrderedByRows) + + datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows2.map( + worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell => (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell)) ))) datasetPrefixes.forEach(datasetPrefix => { - var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), ["Dataset prefix", - "Derived Variable Name", - "Dataset prefix", - "Source Variable Name" + var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows2.filter( + worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), ["dataset prefix", + "derived variable name", + "dataset prefix", + "source variable name" ]) addLinkToPage(dvLinkURL, `${datasetPrefix}_dv.txt`, @@ -209,7 +233,6 @@

Generate text mappings files

- \ No newline at end of file From de1c6cd17120b11bcdb4e8142c0be86a04b32238 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:32:17 +0100 Subject: [PATCH 3/8] Remove commented out line --- index.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/index.html b/index.html index f3f37c2..0e32850 100644 --- a/index.html +++ b/index.html @@ -49,8 +49,6 @@

Generate text mappings files

var rowData = {} - // console.log(worksheetRow) - Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName]) rowValues.push(rowData) From 57ce8859607674df534bf97b03a886b2783ff228 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:34:25 +0100 Subject: [PATCH 4/8] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c5249c..0cbbd55 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This is a tool that implements the functionality described at https://ucldata.at The tool is accessible at https://closer-cohorts.github.io/archivist-utilities/ -It accepts as input an Excel file (.xslx) +It accepts as input an Excel file (.xlsx) The worksheet containing data from which the qv, tv and tq mappings are created MUST be the first worksheet in the file. This worksheet can have any name. From 84c5a217e3d4e781f91abeaa82fb13d0900f04ec Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:36:26 +0100 Subject: [PATCH 5/8] fix text so it reads better --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0cbbd55..ba2f91a 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,8 @@ A separate qv text file will be created for each unique questionnaire prefix val For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: - - heaf_17_fup4_qv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Questionnaire prefix' column) - - heaf_17_fup5_qv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Questionnaire prefix' column) + - heaf_17_fup4_qv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_qv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column) # TV mappings file @@ -42,8 +42,8 @@ A separate tv text file will be created for each unique dataset prefix value in For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: - - heaf_17_fup4_tv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Dataset prefix' column) - - heaf_17_fup5_tv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Dataset prefix' column) + - heaf_17_fup4_tv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_tv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column) # TQ mappings file @@ -59,8 +59,8 @@ A separate tq text file will be created for each unique questionnaire prefix val For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: - - heaf_17_fup4_tq.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Questionnaire prefix' column) - - heaf_17_fup5_tq.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Questionnaire prefix' column) + - heaf_17_fup4_tq.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_tq.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column) # DV mappings file @@ -76,8 +76,8 @@ A separate dv text file will be created for each unique dataset prefix value in For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: - - heaf_17_fup4_dv.txt (containing all the rows which contained 'heaf_17_fup4' in the 'Dataset prefix' column) - - heaf_17_fup5_dv.txt (containing all the rows which contained 'heaf_17_fup5' in the 'Dataset prefix' column) + - heaf_17_fup4_dv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_dv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column) # Text mapping file generation From d703a9517db963fb5b69389413b74967d0cb4777 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:40:28 +0100 Subject: [PATCH 6/8] remove whitespace --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ba2f91a..692e90e 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ The following column headers must be present in the first worksheet in the input - Derived variable name - Source variable name - The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. +The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. A separate dv text file will be created for each unique dataset prefix value in the input Excel file. From 8850527e1a18b74fad8ed88f2dcbb159676b28bd Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:42:42 +0100 Subject: [PATCH 7/8] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 692e90e..e6a6f5f 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ For example, if some rows in the input worksheet had the value 'heaf_17_fup4' fo # TV mappings file -The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files: +The following column headers must be present in the first worksheet in the input Excel file, in order to create the tv text files: - Dataset prefix - Variable name From 49bab8772a3d02de01ec66d3b3edfa5036e9b7a4 Mon Sep 17 00:00:00 2001 From: "o.lyttleton@ucl.ac.uk" Date: Thu, 25 Jul 2024 14:53:36 +0100 Subject: [PATCH 8/8] Tidied code --- index.html | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/index.html b/index.html index 0e32850..39551d8 100644 --- a/index.html +++ b/index.html @@ -65,14 +65,9 @@

Generate text mappings files

worksheetDataByRows.forEach(worksheetRow => { - // We need to create a new object to hold the row data so we can ensure the - // field names of the object are all lower case... - - var rowData = {} - const rowValues = [] - columns.map(columnName => rowValues.push(rowData[columnName.toLowerCase()] + columns.map(columnName => rowValues.push(worksheetRow[columnName.toLowerCase()] + (!!suffix[columnName] ? suffix[columnName] : ""))) if (!(rowValues.includes("NA") || rowValues.includes("Derived") || rowValues.includes(""))) @@ -200,18 +195,17 @@

Generate text mappings files

const dvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[1]] - const dvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" }); - - const dvWorksheetOrderedByRows2 = columnNamesToLowerCase(dvWorksheetOrderedByRows) + const dvWorksheetOrderedByRows = columnNamesToLowerCase( + XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" })) - datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows2.map( + datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows.map( worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell => (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell)) ))) datasetPrefixes.forEach(datasetPrefix => { - var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows2.filter( + var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows.filter( worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), ["dataset prefix", "derived variable name", "dataset prefix",