diff --git a/README.md b/README.md index 8d9c3ea..e6a6f5f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,88 @@ # archivist-utilities -Tools for users of archivist + +This is a tool that implements the functionality described at https://ucldata.atlassian.net/wiki/spaces/CLOS/pages/37323492/Using+Txt+Files + +The tool is accessible at https://closer-cohorts.github.io/archivist-utilities/ + +It accepts as input an Excel file (.xlsx) + +The worksheet containing data from which the qv, tv and tq mappings are created MUST be the first worksheet in the file. This worksheet can have any name. + +The worksheet containing data from which the dv mappings are created MUST be the second worksheet in the file. This worksheet can have any name. + +# QV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files: + + - Questionnaire prefix + - Question name + - Dataset prefix + - Variable name + +The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable. + +A separate qv text file will be created for each unique questionnaire prefix value in the first worksheet in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_qv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_qv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column) + +# TV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the tv text files: + + - Dataset prefix + - Variable name + - Topic id + +The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. + +A separate tv text file will be created for each unique dataset prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_tv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_tv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column) + +# TQ mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the tq text files: + + - Questionnaire prefix + - Question name + - Topic id + +The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable. + +A separate tq text file will be created for each unique questionnaire prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_tq.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column) + - heaf_17_fup5_tq.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column) + +# DV mappings file + +The following column headers must be present in the first worksheet in the input Excel file, in order to create the dv text files: + + - Dataset prefix + - Derived variable name + - Source variable name + +The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable. + +A separate dv text file will be created for each unique dataset prefix value in the input Excel file. + +For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: + + - heaf_17_fup4_dv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column) + - heaf_17_fup5_dv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column) + +# Text mapping file generation + +To generate the text mapping files for Archivist from an Excel XLSX file, simply: + +1. Select the Excel XLSX file from your local computer by pressing the 'Choose file' button +2. A 'Convert file' button will appear after you have selected an XLSX file. Press this button. +3. Links to the various text files generated from this XLSX file will now displayed on the screen, and clicking on these will download the text files to your local computer. \ No newline at end of file diff --git a/index.html b/index.html index bc98f14..39551d8 100644 --- a/index.html +++ b/index.html @@ -38,10 +38,10 @@

Generate text mappings files

} - function createBlobURL(worksheetDataByRows, columns, suffix = {}) { - - var fileData = "" + function columnNamesToLowerCase(worksheetDataByRows) { + const rowValues = [] + worksheetDataByRows.forEach(worksheetRow => { // We need to create a new object to hold the row data so we can ensure the @@ -51,9 +51,23 @@

Generate text mappings files

Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName]) + rowValues.push(rowData) + + }) + + return rowValues + + } + + function createBlobURL(worksheetDataByRows, columns, suffix = {}) { + + var fileData = "" + + worksheetDataByRows.forEach(worksheetRow => { + const rowValues = [] - columns.map(columnName => rowValues.push(rowData[columnName.toLowerCase()] + columns.map(columnName => rowValues.push(worksheetRow[columnName.toLowerCase()] + (!!suffix[columnName] ? suffix[columnName] : ""))) if (!(rowValues.includes("NA") || rowValues.includes("Derived") || rowValues.includes(""))) @@ -108,16 +122,17 @@

Generate text mappings files

const qvTvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[0]] - const qvTvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" }); + const qvTvWorksheetOrderedByRows = columnNamesToLowerCase( + XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" })) var datasetPrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell => + worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell => (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell)) ))) var questionnairePrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Questionnaire prefix']).filter(questionnairePrefixCell => + worksheetRow => worksheetRow['questionnaire prefix']).filter(questionnairePrefixCell => (!!questionnairePrefixCell && !(questionnairePrefixCell.includes("NA") || questionnairePrefixCell.includes("Derived") || !questionnairePrefixCell)) ))) @@ -125,9 +140,9 @@

Generate text mappings files

questionnairePrefixes.forEach(questionnairePrefix => { var qvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Questionnaire prefix'] == questionnairePrefix), - ['Questionnaire prefix', 'Question Name', 'Dataset prefix', 'Variable Name'], - { 'Questionnaire prefix': '_ccs01' }) + worksheetRow => worksheetRow['questionnaire prefix'] == questionnairePrefix), + ['questionnaire prefix', 'question name', 'dataset prefix', 'variable name'], + { 'questionnaire prefix': '_ccs01' }) addLinkToPage(qvLinkURL, `${questionnairePrefix}_qv.txt`, @@ -139,8 +154,8 @@

Generate text mappings files

datasetPrefixes.forEach(datasetPrefix => { var tvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), - ['Dataset prefix', 'Variable Name', 'Topic ID']) + worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), + ['dataset prefix', 'variable name', 'topic id']) addLinkToPage(tvLinkURL, `${datasetPrefix}_tv.txt`, @@ -152,22 +167,22 @@

Generate text mappings files

simplifiedData = [] qvTvWorksheetOrderedByRows.forEach(worksheetRow => { - if (worksheetRow['Question Name'].includes("$")) { - indexOfDollarSign = worksheetRow['Question Name'].indexOf("$") - worksheetRow['Question Name'] = (worksheetRow['Question Name'].slice(0, indexOfDollarSign)) + if (worksheetRow['question name'].includes("$")) { + indexOfDollarSign = worksheetRow['question name'].indexOf("$") + worksheetRow['question name'] = (worksheetRow['question name'].slice(0, indexOfDollarSign)) } - if (simplifiedData.filter(dataRow => dataRow['Question Name'] == worksheetRow['Question Name'] - && dataRow['Topic ID'] == worksheetRow['Topic ID']).length == 0) + if (simplifiedData.filter(dataRow => dataRow['question name'] == worksheetRow['question name'] + && dataRow['topic id'] == worksheetRow['topic id']).length == 0) simplifiedData.push(worksheetRow) }) questionnairePrefixes.forEach(questionnairePrefix => { var tqLinkURL = createBlobURL(simplifiedData.filter( - dataRow => dataRow['Questionnaire prefix'] == questionnairePrefix), - ['Questionnaire prefix', 'Question Name', 'Topic ID'], - { 'Questionnaire prefix': '_ccs01' }) + dataRow => dataRow['questionnaire prefix'] == questionnairePrefix), + ['questionnaire prefix', 'question name', 'topic id'], + { 'questionnaire prefix': '_ccs01' }) addLinkToPage(tqLinkURL, `${questionnairePrefix}_tq.txt`, @@ -180,20 +195,21 @@

Generate text mappings files

const dvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[1]] - const dvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" }); + const dvWorksheetOrderedByRows = columnNamesToLowerCase( + XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" })) datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows.map( - worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell => + worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell => (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell)) ))) datasetPrefixes.forEach(datasetPrefix => { var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows.filter( - worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), ["Dataset prefix", - "Derived Variable Name", - "Dataset prefix", - "Source Variable Name" + worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), ["dataset prefix", + "derived variable name", + "dataset prefix", + "source variable name" ]) addLinkToPage(dvLinkURL, `${datasetPrefix}_dv.txt`, @@ -209,7 +225,6 @@

Generate text mappings files

- \ No newline at end of file