diff --git a/README.md b/README.md
index 8d9c3ea..e6a6f5f 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,88 @@
# archivist-utilities
-Tools for users of archivist
+
+This is a tool that implements the functionality described at https://ucldata.atlassian.net/wiki/spaces/CLOS/pages/37323492/Using+Txt+Files
+
+The tool is accessible at https://closer-cohorts.github.io/archivist-utilities/
+
+It accepts as input an Excel file (.xlsx)
+
+The worksheet containing data from which the qv, tv and tq mappings are created MUST be the first worksheet in the file. This worksheet can have any name.
+
+The worksheet containing data from which the dv mappings are created MUST be the second worksheet in the file. This worksheet can have any name.
+
+# QV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the qv text files:
+
+ - Questionnaire prefix
+ - Question name
+ - Dataset prefix
+ - Variable name
+
+The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable.
+
+A separate qv text file will be created for each unique questionnaire prefix value in the first worksheet in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated:
+
+ - heaf_17_fup4_qv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column)
+ - heaf_17_fup5_qv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column)
+
+# TV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the tv text files:
+
+ - Dataset prefix
+ - Variable name
+ - Topic id
+
+The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable.
+
+A separate tv text file will be created for each unique dataset prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated:
+
+ - heaf_17_fup4_tv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column)
+ - heaf_17_fup5_tv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column)
+
+# TQ mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the tq text files:
+
+ - Questionnaire prefix
+ - Question name
+ - Topic id
+
+The headers are not case sensitive, i.e. a column called 'question name' instead of 'Question Name' is acceptable.
+
+A separate tq text file will be created for each unique questionnaire prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Questionnaire prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated:
+
+ - heaf_17_fup4_tq.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Questionnaire prefix' column)
+ - heaf_17_fup5_tq.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Questionnaire prefix' column)
+
+# DV mappings file
+
+The following column headers must be present in the first worksheet in the input Excel file, in order to create the dv text files:
+
+ - Dataset prefix
+ - Derived variable name
+ - Source variable name
+
+The headers are not case sensitive, i.e. a column called 'dataset prefix' instead of 'Dataset Prefix' is acceptable.
+
+A separate dv text file will be created for each unique dataset prefix value in the input Excel file.
+
+For example, if some rows in the input worksheet had the value 'heaf_17_fup4' for the 'Dataset prefix' column, and some other rows had the value 'heaf_17_fup5' for that column, this would result in 2 text files being generated:
+
+ - heaf_17_fup4_dv.txt (containing all the rows which had the value 'heaf_17_fup4' in the 'Dataset prefix' column)
+ - heaf_17_fup5_dv.txt (containing all the rows which had the value 'heaf_17_fup5' in the 'Dataset prefix' column)
+
+# Text mapping file generation
+
+To generate the text mapping files for Archivist from an Excel XLSX file, simply:
+
+1. Select the Excel XLSX file from your local computer by pressing the 'Choose file' button
+2. A 'Convert file' button will appear after you have selected an XLSX file. Press this button.
+3. Links to the various text files generated from this XLSX file will now displayed on the screen, and clicking on these will download the text files to your local computer.
\ No newline at end of file
diff --git a/index.html b/index.html
index bc98f14..39551d8 100644
--- a/index.html
+++ b/index.html
@@ -38,10 +38,10 @@
Generate text mappings files
}
- function createBlobURL(worksheetDataByRows, columns, suffix = {}) {
-
- var fileData = ""
+ function columnNamesToLowerCase(worksheetDataByRows) {
+ const rowValues = []
+
worksheetDataByRows.forEach(worksheetRow => {
// We need to create a new object to hold the row data so we can ensure the
@@ -51,9 +51,23 @@ Generate text mappings files
Object.keys(worksheetRow).forEach(fieldName => rowData[fieldName.toLowerCase()] = worksheetRow[fieldName])
+ rowValues.push(rowData)
+
+ })
+
+ return rowValues
+
+ }
+
+ function createBlobURL(worksheetDataByRows, columns, suffix = {}) {
+
+ var fileData = ""
+
+ worksheetDataByRows.forEach(worksheetRow => {
+
const rowValues = []
- columns.map(columnName => rowValues.push(rowData[columnName.toLowerCase()]
+ columns.map(columnName => rowValues.push(worksheetRow[columnName.toLowerCase()]
+ (!!suffix[columnName] ? suffix[columnName] : "")))
if (!(rowValues.includes("NA") || rowValues.includes("Derived") || rowValues.includes("")))
@@ -108,16 +122,17 @@ Generate text mappings files
const qvTvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[0]]
- const qvTvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" });
+ const qvTvWorksheetOrderedByRows = columnNamesToLowerCase(
+ XLSX.utils.sheet_to_json(qvTvWorksheet, { defval: "" }))
var datasetPrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map(
- worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell =>
+ worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell =>
(!!datasetPrefixCell && !(datasetPrefixCell.includes("NA")
|| datasetPrefixCell.includes("Derived") || !datasetPrefixCell))
)))
var questionnairePrefixes = (getUniqueArrayValues(qvTvWorksheetOrderedByRows.map(
- worksheetRow => worksheetRow['Questionnaire prefix']).filter(questionnairePrefixCell =>
+ worksheetRow => worksheetRow['questionnaire prefix']).filter(questionnairePrefixCell =>
(!!questionnairePrefixCell && !(questionnairePrefixCell.includes("NA")
|| questionnairePrefixCell.includes("Derived") || !questionnairePrefixCell))
)))
@@ -125,9 +140,9 @@ Generate text mappings files
questionnairePrefixes.forEach(questionnairePrefix => {
var qvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter(
- worksheetRow => worksheetRow['Questionnaire prefix'] == questionnairePrefix),
- ['Questionnaire prefix', 'Question Name', 'Dataset prefix', 'Variable Name'],
- { 'Questionnaire prefix': '_ccs01' })
+ worksheetRow => worksheetRow['questionnaire prefix'] == questionnairePrefix),
+ ['questionnaire prefix', 'question name', 'dataset prefix', 'variable name'],
+ { 'questionnaire prefix': '_ccs01' })
addLinkToPage(qvLinkURL,
`${questionnairePrefix}_qv.txt`,
@@ -139,8 +154,8 @@ Generate text mappings files
datasetPrefixes.forEach(datasetPrefix => {
var tvLinkURL = createBlobURL(qvTvWorksheetOrderedByRows.filter(
- worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix),
- ['Dataset prefix', 'Variable Name', 'Topic ID'])
+ worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix),
+ ['dataset prefix', 'variable name', 'topic id'])
addLinkToPage(tvLinkURL,
`${datasetPrefix}_tv.txt`,
@@ -152,22 +167,22 @@ Generate text mappings files
simplifiedData = []
qvTvWorksheetOrderedByRows.forEach(worksheetRow => {
- if (worksheetRow['Question Name'].includes("$")) {
- indexOfDollarSign = worksheetRow['Question Name'].indexOf("$")
- worksheetRow['Question Name'] = (worksheetRow['Question Name'].slice(0, indexOfDollarSign))
+ if (worksheetRow['question name'].includes("$")) {
+ indexOfDollarSign = worksheetRow['question name'].indexOf("$")
+ worksheetRow['question name'] = (worksheetRow['question name'].slice(0, indexOfDollarSign))
}
- if (simplifiedData.filter(dataRow => dataRow['Question Name'] == worksheetRow['Question Name']
- && dataRow['Topic ID'] == worksheetRow['Topic ID']).length == 0)
+ if (simplifiedData.filter(dataRow => dataRow['question name'] == worksheetRow['question name']
+ && dataRow['topic id'] == worksheetRow['topic id']).length == 0)
simplifiedData.push(worksheetRow)
})
questionnairePrefixes.forEach(questionnairePrefix => {
var tqLinkURL = createBlobURL(simplifiedData.filter(
- dataRow => dataRow['Questionnaire prefix'] == questionnairePrefix),
- ['Questionnaire prefix', 'Question Name', 'Topic ID'],
- { 'Questionnaire prefix': '_ccs01' })
+ dataRow => dataRow['questionnaire prefix'] == questionnairePrefix),
+ ['questionnaire prefix', 'question name', 'topic id'],
+ { 'questionnaire prefix': '_ccs01' })
addLinkToPage(tqLinkURL,
`${questionnairePrefix}_tq.txt`,
@@ -180,20 +195,21 @@ Generate text mappings files
const dvWorksheet = wb['Sheets'][Object.keys(wb['Sheets'])[1]]
- const dvWorksheetOrderedByRows = XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" });
+ const dvWorksheetOrderedByRows = columnNamesToLowerCase(
+ XLSX.utils.sheet_to_json(dvWorksheet, { defval: "" }))
datasetPrefixes = (getUniqueArrayValues(dvWorksheetOrderedByRows.map(
- worksheetRow => worksheetRow['Dataset prefix']).filter(datasetPrefixCell =>
+ worksheetRow => worksheetRow['dataset prefix']).filter(datasetPrefixCell =>
(!!datasetPrefixCell
&& !(datasetPrefixCell.includes("NA") || datasetPrefixCell.includes("Derived") || !datasetPrefixCell))
)))
datasetPrefixes.forEach(datasetPrefix => {
var dvLinkURL = createBlobURL(dvWorksheetOrderedByRows.filter(
- worksheetRow => worksheetRow['Dataset prefix'] == datasetPrefix), ["Dataset prefix",
- "Derived Variable Name",
- "Dataset prefix",
- "Source Variable Name"
+ worksheetRow => worksheetRow['dataset prefix'] == datasetPrefix), ["dataset prefix",
+ "derived variable name",
+ "dataset prefix",
+ "source variable name"
])
addLinkToPage(dvLinkURL,
`${datasetPrefix}_dv.txt`,
@@ -209,7 +225,6 @@ Generate text mappings files
-