Merge pull request #22 from CLOSER-Cohorts/update-readme

Update readme
CLOSER-Cohorts · Jul 25, 2024 · 018180c · 018180c
2 parents 6c4e279 + 49bab87
commit 018180c
Show file tree

Hide file tree

Showing 2 changed files with 129 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,88 @@
 # archivist-utilities
-Tools for users of archivist
+
+This is a tool that implements the functionality described at https://ucldata.atlassian.net/wiki/spaces/CLOS/pages/37323492/Using+Txt+Files
+
+The tool is accessible at https://closer-cohorts.github.io/archivist-utilities/
+
+It accepts as input an Excel file (.xlsx)
+
+The worksheet containing data from which the qv, tv and tq mappings are created MUST be the first worksheet in the file. This worksheet can have any name.
+
+The worksheet containing data from which the dv mappings are created MUST be the second worksheet in the file. This worksheet can have any name.
+
+# QV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files:
+
+ - Questionnaire prefix
+ - Question name
+ - Dataset prefix
+ - Variable name
+
+The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable.
+
+A separate qv text file will be created for each unique questionnaire prefix value in the first worksheet in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: 
+
+ - heaf_17_fup4_qv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column)
+ - heaf_17_fup5_qv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column)
+
+# TV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the tv text files:
+
+ - Dataset prefix
+ - Variable name
+ - Topic id
+
+The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable.
+
+A separate tv text file will be created for each unique dataset prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: 
+
+ - heaf_17_fup4_tv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column)
+ - heaf_17_fup5_tv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column)
+
+# TQ mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the tq text files:
+
+ - Questionnaire prefix
+ - Question name
+ - Topic id
+
+The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable.
+
+A separate tq text file will be created for each unique questionnaire prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: 
+
+ - heaf_17_fup4_tq.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column)
+ - heaf_17_fup5_tq.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column)
+
+# DV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the dv text files:
+
+ - Dataset prefix
+ - Derived variable name
+ - Source variable name
+
+The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable.
+
+A separate dv text file will be created for each unique dataset prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated: 
+
+ - heaf_17_fup4_dv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column)
+ - heaf_17_fup5_dv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column)
+
+# Text mapping file generation
+
+To generate the text mapping files for Archivist from an Excel XLSX file, simply:
+
+1. Select the Excel XLSX file from your local computer by pressing the 'Choose file' button
+2. A 'Convert file' button will appear after you have selected an XLSX file. Press this button.
+3. Links to the various text files generated from this XLSX file will now displayed on the screen, and clicking on these will download the text files to your local computer.
diff --git a/index.html b/index.html
@@ -38,10 +38,10 @@ <h1>Generate text mappings files</h1>
 
     }
 
-    function createBlobURL(worksheetDataByRows, columns, suffix = {}) {
-
-      var fileData = ""
+    function columnNamesToLowerCase(worksheetDataByRows) {
 
+      const rowValues = []
+
       worksheetDataByRows.forEach(worksheetRow => {
 
         // We need to create a new object to hold the row data so we can ensure the
@@ -51,9 +51,23 @@ <h1>Generate text mappings files</h1>
 
         Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName])
 
+        rowValues.push(rowData)
+
+      })
+
+      return rowValues
+
+    }
+
+    function createBlobURL(worksheetDataByRows, columns, suffix = {}) {
+
+      var fileData = ""
+
+      worksheetDataByRows.forEach(worksheetRow => {
+
         const rowValues = []
 
-        columns.map(columnName => rowValues.push(rowData[columnName.toLowerCase()] 
+        columns.map(columnName => rowValues.push(worksheetRow[columnName.toLowerCase()] 
           + (!!suffix[columnName] ? suffix[columnName] : "")))
 
         if (!(rowValues.includes("NA") || rowValues.includes("Derived") || rowValues.includes("")))
@@ -108,26 +122,27 @@ <h1>Generate text mappings files</h1>
 
         const qvTvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[0]]
 
-        const qvTvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" });
+        const qvTvWorksheetOrderedByRows = columnNamesToLowerCase(
+          XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" }))
 
         var datasetPrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map(
-          worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell =>
+          worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell =>
             (!!datasetPrefixCell && !(datasetPrefixCell.includes("NA") 
             || datasetPrefixCell.includes("Derived") || !datasetPrefixCell))
         )))
 
         var questionnairePrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map(
-          worksheetRow => worksheetRow['Questionnaire prefix']).filter(questionnairePrefixCell =>
+          worksheetRow => worksheetRow['questionnaire prefix']).filter(questionnairePrefixCell =>
             (!!questionnairePrefixCell && !(questionnairePrefixCell.includes("NA") 
             || questionnairePrefixCell.includes("Derived") || !questionnairePrefixCell))
         )))
 
         questionnairePrefixes.forEach(questionnairePrefix => {
 
           var qvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter(
-            worksheetRow => worksheetRow['Questionnaire prefix'] == questionnairePrefix), 
-              ['Questionnaire prefix', 'Question Name', 'Dataset prefix', 'Variable Name'],
-              { 'Questionnaire prefix': '_ccs01' })
+            worksheetRow => worksheetRow['questionnaire prefix'] == questionnairePrefix), 
+              ['questionnaire prefix', 'question name', 'dataset prefix', 'variable name'],
+              { 'questionnaire prefix': '_ccs01' })
 
             addLinkToPage(qvLinkURL, 
             `${questionnairePrefix}_qv.txt`, 
@@ -139,8 +154,8 @@ <h1>Generate text mappings files</h1>
         datasetPrefixes.forEach(datasetPrefix => {
 
           var tvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter(
-            worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), 
-            ['Dataset prefix', 'Variable Name', 'Topic ID'])
+            worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), 
+            ['dataset prefix', 'variable name', 'topic id'])
 
           addLinkToPage(tvLinkURL, 
           `${datasetPrefix}_tv.txt`, 
@@ -152,22 +167,22 @@ <h1>Generate text mappings files</h1>
         simplifiedData = []
 
         qvTvWorksheetOrderedByRows.forEach(worksheetRow => {
-          if (worksheetRow['Question Name'].includes("$")) {
-            indexOfDollarSign = worksheetRow['Question Name'].indexOf("$")
-            worksheetRow['Question Name'] = (worksheetRow['Question Name'].slice(0, indexOfDollarSign))
+          if (worksheetRow['question name'].includes("$")) {
+            indexOfDollarSign = worksheetRow['question name'].indexOf("$")
+            worksheetRow['question name'] = (worksheetRow['question name'].slice(0, indexOfDollarSign))
 
           }
-          if (simplifiedData.filter(dataRow => dataRow['Question Name'] == worksheetRow['Question Name']
-            && dataRow['Topic ID'] == worksheetRow['Topic ID']).length == 0)
+          if (simplifiedData.filter(dataRow => dataRow['question name'] == worksheetRow['question name']
+            && dataRow['topic id'] == worksheetRow['topic id']).length == 0)
             simplifiedData.push(worksheetRow)
         })
 
         questionnairePrefixes.forEach(questionnairePrefix => {
 
           var tqLinkURL = createBlobURL(simplifiedData.filter(
-            dataRow => dataRow['Questionnaire prefix'] == questionnairePrefix), 
-            ['Questionnaire prefix', 'Question Name', 'Topic ID'],
-            { 'Questionnaire prefix': '_ccs01' })
+            dataRow => dataRow['questionnaire prefix'] == questionnairePrefix), 
+            ['questionnaire prefix', 'question name', 'topic id'],
+            { 'questionnaire prefix': '_ccs01' })
 
           addLinkToPage(tqLinkURL, 
             `${questionnairePrefix}_tq.txt`, 
@@ -180,20 +195,21 @@ <h1>Generate text mappings files</h1>
 
         const dvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[1]]
 
-        const dvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" });
+        const dvWorksheetOrderedByRows = columnNamesToLowerCase(
+          XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" }))
 
         datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows.map(
-          worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell =>
+          worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell =>
           (!!datasetPrefixCell 
           && !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell))
         )))
 
         datasetPrefixes.forEach(datasetPrefix => {
           var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows.filter(
-            worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), ["Dataset prefix",
-            "Derived Variable Name",
-            "Dataset prefix",
-            "Source Variable Name"
+            worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), ["dataset prefix",
+            "derived variable name",
+            "dataset prefix",
+            "source variable name"
           ])
           addLinkToPage(dvLinkURL, 
           `${datasetPrefix}_dv.txt`, 
@@ -209,7 +225,6 @@ <h1>Generate text mappings files</h1>
 
   </script>
 
-
 </body>
 
 </html>