From 4b9dd06a49119545cf3f0db13d45dbf0f9766159 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 21 Jun 2023 22:13:50 -0400 Subject: [PATCH 01/21] cast all columns in the UNPIVOT statement to varchar(256) to prevent conflicting column types from causing the UNPIVOT to fail --- Code/Sql/spTranslateTable.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index cbdd98e..a02ce65 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -29,9 +29,9 @@ AS FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema - -- create comma delimited list of columns to be selected from the source table + -- create comma delimited list of columns to be selected from the source table, including casting to varchar DECLARE @SourceSelectColNames varchar(8000) - SELECT @SourceSelectColNames=STRING_AGG('[' + COLUMN_NAME + ']', ', ') + SELECT @SourceSelectColNames=STRING_AGG('CAST ([' + COLUMN_NAME + '] AS varchar(256)) AS ' + COLUMN_NAME, ', ') FROM #tmpColNames -- PRINT @SourceSelectColNames From 6c9a7092599bd4b42b5ecddd50737b5b6f02406b Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 17:15:39 -0400 Subject: [PATCH 02/21] fixes STRING_AGG varchar overflow --- Code/Sql/spTranslateTable.sql | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index a02ce65..6422838 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -1,7 +1,6 @@ /* Stored procedure to translate variable responses in an NHANES questionnaire table */ - CREATE PROC spTranslateTable @SourceTableSchema varchar(8000), @SourceTableName varchar(128), @@ -28,17 +27,31 @@ AS INTO #tmpColNames FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema + + -- figure out whether this table has a SEQN or SAMPLEID primary key + DECLARE @pkColName varchar(256) + DECLARE @flag int + SELECT @flag = COUNT(1) FROM #tmpColNames WHERE COLUMN_NAME='SEQN' + + IF @flag = 1 + SET @pkColName = 'SEQN' + ELSE + SET @pkColName = 'SAMPLEID' + + PRINT @pkColName + + -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName -- create comma delimited list of columns to be selected from the source table, including casting to varchar - DECLARE @SourceSelectColNames varchar(8000) - SELECT @SourceSelectColNames=STRING_AGG('CAST ([' + COLUMN_NAME + '] AS varchar(256)) AS ' + COLUMN_NAME, ', ') + DECLARE @SourceSelectColNames varchar(MAX) + SELECT @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') FROM #tmpColNames -- PRINT @SourceSelectColNames -- create comma delimited list of columns to be unpivoted - DECLARE @UnpivotColNames varchar(8000) - SELECT @UnpivotColNames=STRING_AGG('[' + COLUMN_NAME + ']', ', ') + DECLARE @UnpivotColNames varchar(MAX) + SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') FROM #tmpColNames WHERE COLUMN_NAME != 'SEQN' @@ -49,7 +62,7 @@ AS -- PRINT @UnpivotColNames -- assemble dynamic SQL to unpivot the original table - DECLARE @unpivotStmt varchar(8000) + DECLARE @unpivotStmt varchar(MAX) SET @unpivotStmt = ' SELECT SEQN, Variable, Response INTO ' + @UnpivotTempTableName + ' @@ -70,7 +83,7 @@ AS -- assemble SQL to join the unpivoted table to the variable codebook -- to decode the responses - DECLARE @TranslateStmt varchar(8000) + DECLARE @TranslateStmt varchar(MAX) SET @TranslateStmt = ' SELECT T.SEQN, @@ -89,7 +102,7 @@ AS EXEC (@TranslateStmt) -- assemble SQL to pivot the translated table back into the original schema - DECLARE @PivotStmt varchar(8000) + DECLARE @PivotStmt varchar(MAX) SET @PivotStmt = ' SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ( SELECT From 1437bbc4a56f94496c57e9912f8aae0c082f30a9 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 21:19:21 -0400 Subject: [PATCH 03/21] Accommodate NHANES tables being translated by Code/Sql/spTranslateTable.sql in the Raw schema where the data is from pooled samples and the primary key is SAMPLEID rather than SEQN. --- Code/Sql/spTranslateTable.sql | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 6422838..f16bd6d 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -1,11 +1,11 @@ - /* - Stored procedure to translate variable responses in an NHANES questionnaire table - */ +/* + Stored procedure to translate variable responses in an NHANES questionnaire table +*/ CREATE PROC spTranslateTable - @SourceTableSchema varchar(8000), - @SourceTableName varchar(128), - @DestinationTableSchema varchar(8000), - @DestinationTableName varchar(128) + @SourceTableSchema varchar(MAX), + @SourceTableName varchar(MAX), + @DestinationTableSchema varchar(MAX), + @DestinationTableName varchar(MAX) AS -- drop the destination table if it exists @@ -55,6 +55,7 @@ AS FROM #tmpColNames WHERE COLUMN_NAME != 'SEQN' + AND COLUMN_NAME != 'SAMPLEID' AND COLUMN_NAME != 'DownloadUrl' AND COLUMN_NAME != 'Questionnaire' AND COLUMN_NAME != 'Description' @@ -64,7 +65,7 @@ AS -- assemble dynamic SQL to unpivot the original table DECLARE @unpivotStmt varchar(MAX) SET @unpivotStmt = ' - SELECT SEQN, Variable, Response + SELECT ' + @pkColName + ', Variable, Response INTO ' + @UnpivotTempTableName + ' FROM ( SELECT @@ -86,7 +87,7 @@ AS DECLARE @TranslateStmt varchar(MAX) SET @TranslateStmt = ' SELECT - T.SEQN, + T.' + @pkColName + ', T.Variable, COALESCE(CAST(V.ValueDescription AS VARCHAR), CAST(T.Response AS VARCHAR)) AS ValueDescription INTO @@ -106,7 +107,7 @@ AS SET @PivotStmt = ' SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ( SELECT - SEQN, + ' + @pkColName + ', Variable, ValueDescription FROM ' + @TranslatedTempTableName + ' @@ -125,8 +126,8 @@ AS -- insert rows where all of the variable responses were NULL in the original data DECLARE @InsertNullStmt varchar(8000) SET @InsertNullStmt = ' - INSERT INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' (SEQN) - SELECT A.SEQN FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' A LEFT OUTER JOIN ' + @DestinationTableSchema + '.' + @DestinationTableName + ' T ON A.SEQN = T.SEQN WHERE T.SEQN IS NULL + INSERT INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' (' + @pkColName + ') + SELECT A.' + @pkColName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' A LEFT OUTER JOIN ' + @DestinationTableSchema + '.' + @DestinationTableName + ' T ON A.' + @pkColName + ' = T.' + @pkColName + ' WHERE T.' + @pkColName + ' IS NULL ' -- PRINT @InsertNullStmt From 6dc2b5707e15d36dae80878b49b41f2f9c034332 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 21:35:50 -0400 Subject: [PATCH 04/21] add debugging scripts --- .../RerunTableTranslateOnMissingTables.sql | 32 +++++++++++++++++++ Code/Sql/RunTableTranslateOnSingleTable | 21 ++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 Code/Sql/RerunTableTranslateOnMissingTables.sql create mode 100644 Code/Sql/RunTableTranslateOnSingleTable diff --git a/Code/Sql/RerunTableTranslateOnMissingTables.sql b/Code/Sql/RerunTableTranslateOnMissingTables.sql new file mode 100644 index 0000000..715ab40 --- /dev/null +++ b/Code/Sql/RerunTableTranslateOnMissingTables.sql @@ -0,0 +1,32 @@ +-- loop over all missing tables in the last release + SELECT TABLE_NAME + INTO #tmpRawTables + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = 'Raw' + + SELECT TABLE_NAME + INTO #tmpTranslatedTables + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = 'Translated' + +DROP TABLE IF EXISTS #tmpDebugTables +SELECT R.TABLE_NAME INTO #tmpDebugTables FROM #tmpRawTables R LEFT OUTER JOIN #tmpTranslatedTables T ON R.TABLE_NAME = T.TABLE_NAME WHERE T.TABLE_NAME IS NULL ORDER BY R.TABLE_NAME + +DECLARE cTableNames CURSOR FOR SELECT * FROM #tmpDebugTables ORDER BY TABLE_NAME +DECLARE @currTableName varchar(256) +OPEN cTableNames + +FETCH NEXT FROM cTableNames INTO @currTableName +WHILE @@FETCH_STATUS = 0 +BEGIN + PRINT @currTableName + EXECUTE dbo.spTranslateTable + 'Raw' + ,@currTableName + ,'Translated' + ,@currTableName + FETCH NEXT FROM cTableNames INTO @currTableName +END + +CLOSE cTableNames +DEALLOCATE cTableNames \ No newline at end of file diff --git a/Code/Sql/RunTableTranslateOnSingleTable b/Code/Sql/RunTableTranslateOnSingleTable new file mode 100644 index 0000000..cb794fa --- /dev/null +++ b/Code/Sql/RunTableTranslateOnSingleTable @@ -0,0 +1,21 @@ +-- Manually run + +DECLARE @RC int +DECLARE @SourceTableSchema varchar(8000) +DECLARE @SourceTableName varchar(128) +DECLARE @DestinationTableSchema varchar(8000) +DECLARE @DestinationTableName varchar(128) + +-- TODO: Set parameter values here. +-- CAFE_G +SET @SourceTableSchema = 'Raw' +SET @SourceTableName = 'AUX_I' +SET @DestinationTableSchema = 'Translated' + +EXECUTE @RC = [dbo].[spTranslateTable] + @SourceTableSchema + ,@SourceTableName + ,@DestinationTableSchema + ,@DestinationTableName = @SourceTableName + +EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName) \ No newline at end of file From c282b344f8a78deb7b007308ea231dc22e7c7590 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 21:40:36 -0400 Subject: [PATCH 05/21] fixed comment --- Code/Sql/RunTableTranslateOnSingleTable.sql | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Code/Sql/RunTableTranslateOnSingleTable.sql diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql new file mode 100644 index 0000000..1612f49 --- /dev/null +++ b/Code/Sql/RunTableTranslateOnSingleTable.sql @@ -0,0 +1,21 @@ +-- Manually run spTranslateTable + +DECLARE @RC int +DECLARE @SourceTableSchema varchar(8000) +DECLARE @SourceTableName varchar(128) +DECLARE @DestinationTableSchema varchar(8000) +DECLARE @DestinationTableName varchar(128) + +-- TODO: Set parameter values here. +-- CAFE_G +SET @SourceTableSchema = 'Raw' +SET @SourceTableName = 'AUX_I' +SET @DestinationTableSchema = 'Translated' + +EXECUTE @RC = [dbo].[spTranslateTable] + @SourceTableSchema + ,@SourceTableName + ,@DestinationTableSchema + ,@DestinationTableName = @SourceTableName + +EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName) \ No newline at end of file From 25d38b44c4b41fe4ca511003e308249ce497c318 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 21:51:41 -0400 Subject: [PATCH 06/21] old delete --- Code/Sql/RunTableTranslateOnSingleTable | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 Code/Sql/RunTableTranslateOnSingleTable diff --git a/Code/Sql/RunTableTranslateOnSingleTable b/Code/Sql/RunTableTranslateOnSingleTable deleted file mode 100644 index cb794fa..0000000 --- a/Code/Sql/RunTableTranslateOnSingleTable +++ /dev/null @@ -1,21 +0,0 @@ --- Manually run - -DECLARE @RC int -DECLARE @SourceTableSchema varchar(8000) -DECLARE @SourceTableName varchar(128) -DECLARE @DestinationTableSchema varchar(8000) -DECLARE @DestinationTableName varchar(128) - --- TODO: Set parameter values here. --- CAFE_G -SET @SourceTableSchema = 'Raw' -SET @SourceTableName = 'AUX_I' -SET @DestinationTableSchema = 'Translated' - -EXECUTE @RC = [dbo].[spTranslateTable] - @SourceTableSchema - ,@SourceTableName - ,@DestinationTableSchema - ,@DestinationTableName = @SourceTableName - -EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName) \ No newline at end of file From 692a94ac62376127452418ba1a5441862e54ceeb Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Thu, 22 Jun 2023 22:01:00 -0400 Subject: [PATCH 07/21] remove debugging statements --- Code/Sql/spTranslateTable.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index f16bd6d..f3b1572 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -38,7 +38,7 @@ AS ELSE SET @pkColName = 'SAMPLEID' - PRINT @pkColName + -- PRINT @pkColName -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName From 5f4b4e02e5c6d9d006eea5ec203aaf3b9db26981 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Fri, 23 Jun 2023 15:40:26 -0400 Subject: [PATCH 08/21] allows passthrough of untranslate-able lookup tables and identified various other primary keys that are used instead of SEQN --- .../RerunTableTranslateOnMissingTables.sql | 10 ++++ Code/Sql/spTranslateTable.sql | 52 ++++++++++++++----- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/Code/Sql/RerunTableTranslateOnMissingTables.sql b/Code/Sql/RerunTableTranslateOnMissingTables.sql index 715ab40..7ec712b 100644 --- a/Code/Sql/RerunTableTranslateOnMissingTables.sql +++ b/Code/Sql/RerunTableTranslateOnMissingTables.sql @@ -1,3 +1,11 @@ +/* + This is a debugging tool to help identify tables that fail to translate and re-run + their translations once a bugfix has been proposed. +*/ + +DROP TABLE IF EXISTS #tmpRawTables +DROP TABLE IF EXISTS #tmpTranslatedTables + -- loop over all missing tables in the last release SELECT TABLE_NAME INTO #tmpRawTables @@ -12,6 +20,8 @@ DROP TABLE IF EXISTS #tmpDebugTables SELECT R.TABLE_NAME INTO #tmpDebugTables FROM #tmpRawTables R LEFT OUTER JOIN #tmpTranslatedTables T ON R.TABLE_NAME = T.TABLE_NAME WHERE T.TABLE_NAME IS NULL ORDER BY R.TABLE_NAME +SELECT * FROM #tmpDebugTables + DECLARE cTableNames CURSOR FOR SELECT * FROM #tmpDebugTables ORDER BY TABLE_NAME DECLARE @currTableName varchar(256) OPEN cTableNames diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index f3b1572..7c44bc8 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -8,6 +8,26 @@ CREATE PROC spTranslateTable @DestinationTableName varchar(MAX) AS + -- check that the variable codebook actually has data for this table + DECLARE @variableTranslationCount INT + SELECT @variableTranslationCount = COUNT(*) FROM Metadata.VariableCodebook C WHERE C.TableName = @SourceTableName + + -- if there are no translatable variables for this table, just copy it over + IF @variableTranslationCount = 0 + BEGIN + + PRINT 'There are no variables available to translate for ' + @SourceTableSchema + '.' + @SourceTableName + '. Copying table as-is.' + DECLARE @CopyTableStatement varchar(MAX) + SET @CopyTableStatement = ' + DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + ' + SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' + CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName + + EXEC(@CopyTableStatement) + -- we can't do any of the steps below, so just return + RETURN + END + -- drop the destination table if it exists DECLARE @DropDestinationStmt varchar(8000) SET @DropDestinationStmt = 'DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName @@ -28,19 +48,20 @@ AS FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema - -- figure out whether this table has a SEQN or SAMPLEID primary key + -- figure out which primary key column this table has + DROP TABLE IF EXISTS #tmpPkColNames + CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int) + + INSERT INTO #tmpPkColNames VALUES ('SEQN', 1) + INSERT INTO #tmpPkColNames VALUES ('SAMPLEID', 2) + INSERT INTO #tmpPkColNames VALUES ('DRXFDCD', 3) + INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4) + INSERT INTO #tmpPkColNames VALUES ('POOLID', 4) + DECLARE @pkColName varchar(256) - DECLARE @flag int - SELECT @flag = COUNT(1) FROM #tmpColNames WHERE COLUMN_NAME='SEQN' + SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority - IF @flag = 1 - SET @pkColName = 'SEQN' - ELSE - SET @pkColName = 'SAMPLEID' - - -- PRINT @pkColName - - -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName + PRINT @pkColName -- create comma delimited list of columns to be selected from the source table, including casting to varchar DECLARE @SourceSelectColNames varchar(MAX) @@ -49,13 +70,20 @@ AS -- PRINT @SourceSelectColNames - -- create comma delimited list of columns to be unpivoted + -- create comma delimited list of columns to be unpivoted, excluding primary key columns and old metadata DECLARE @UnpivotColNames varchar(MAX) SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') FROM #tmpColNames WHERE + -- primary key column names + -- TODO: copy them out of the #tmpPkColNames so we only need to update constants in one place COLUMN_NAME != 'SEQN' AND COLUMN_NAME != 'SAMPLEID' + AND COLUMN_NAME != 'DRXFDCD' + AND COLUMN_NAME != 'DRXMC' + AND COLUMN_NAME != 'POOLID' + -- metadata column names + -- TODO: remove these and test AND COLUMN_NAME != 'DownloadUrl' AND COLUMN_NAME != 'Questionnaire' AND COLUMN_NAME != 'Description' From 720274606c63957e4e16bc9dc6e43c8719f7638b Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Tue, 27 Jun 2023 17:12:42 -0400 Subject: [PATCH 09/21] preserves INT column types on additional column names that appear as primary keys: SAMPLEID, DRXFDCD, DRXMC, POOLID --- Code/R/download.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Code/R/download.R b/Code/R/download.R index 925130b..bc7ce38 100644 --- a/Code/R/download.R +++ b/Code/R/download.R @@ -318,8 +318,12 @@ for (i in i:length(dataTypes)) { # change DOUBLE to float createTableQuery = gsub(createTableQuery, pattern = "\" DOUBLE", replace = "\" float", fixed = TRUE) - # we know that SEQN should always be an INT + # we know that SEQN and other primary keys should always be an INT createTableQuery = gsub(createTableQuery, pattern = "\"SEQN\" float", replace = "\"SEQN\" INT", fixed = TRUE) # nolint + createTableQuery = gsub(createTableQuery, pattern = "\"SAMPLEID\" float", replace = "\"SAMPLEID\" INT", fixed = TRUE) # nolint + createTableQuery = gsub(createTableQuery, pattern = "\"DRXFDCD\" float", replace = "\"DRXFDCD\" INT", fixed = TRUE) # nolint + createTableQuery = gsub(createTableQuery, pattern = "\"DRXMC\" float", replace = "\"DRXMC\" INT", fixed = TRUE) # nolint + createTableQuery = gsub(createTableQuery, pattern = "\"POOLID\" float", replace = "\"POOLID\" INT", fixed = TRUE) # nolint # remove double quotes, which interferes with the schema specification createTableQuery = gsub(createTableQuery, pattern = '"', replace = "", fixed = TRUE) From 6d0e82302e582c823ec6c64340853897194ad267 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Tue, 27 Jun 2023 17:14:29 -0400 Subject: [PATCH 10/21] Multiple changes to address #51, attempt to only translate categorical variables and "pass-through" the variables whose ValueDescription matches string 'Range of Values', indicating numeric type. --- Code/Sql/spTranslateTable.sql | 105 ++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 7c44bc8..9f7b943 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -8,6 +8,7 @@ CREATE PROC spTranslateTable @DestinationTableName varchar(MAX) AS + -- check that the variable codebook actually has data for this table DECLARE @variableTranslationCount INT SELECT @variableTranslationCount = COUNT(*) FROM Metadata.VariableCodebook C WHERE C.TableName = @SourceTableName @@ -22,8 +23,8 @@ AS DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + ' SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName - EXEC(@CopyTableStatement) + -- we can't do any of the steps below, so just return RETURN END @@ -43,50 +44,74 @@ AS -- get all column names of the source table from the information schema DROP TABLE IF EXISTS #tmpColNames - SELECT COLUMN_NAME + SELECT COLUMN_NAME, ORDINAL_POSITION INTO #tmpColNames FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema + WHERE + TABLE_NAME = @SourceTableName + AND TABLE_SCHEMA = @SourceTableSchema + + -- SELECT * FROM #tmpColNames - -- figure out which primary key column this table has + -- enumerate possible primary key column names DROP TABLE IF EXISTS #tmpPkColNames CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int) - INSERT INTO #tmpPkColNames VALUES ('SEQN', 1) INSERT INTO #tmpPkColNames VALUES ('SAMPLEID', 2) INSERT INTO #tmpPkColNames VALUES ('DRXFDCD', 3) INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4) - INSERT INTO #tmpPkColNames VALUES ('POOLID', 4) + INSERT INTO #tmpPkColNames VALUES ('POOLID', 5) + + -- Figure out which columns should remain numeric and not get translated + DROP TABLE IF EXISTS #tmpNumericColumns + + SELECT C.COLUMN_NAME, ORDINAL_POSITION + INTO #tmpNumericColumns + FROM + #tmpColNames C + INNER JOIN Metadata.VariableCodebook V ON + V.TableName = @SourceTableName + AND C.COLUMN_NAME = V.Variable + AND V.ValueDescription = 'Range of Values' + WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) + GROUP BY C.COLUMN_NAME, ORDINAL_POSITION + + -- SELECT * FROM #tmpNumericColumns + + -- remove the numeric column names from the table that contains all columns to be translated + DELETE FROM #tmpColNames WHERE COLUMN_NAME IN (SELECT COLUMN_NAME FROM #tmpNumericColumns) + + -- SELECT * FROM #tmpColNames + + -- create comma delimited list of the numeric columns, will be used later to retrieve them from the source table + DECLARE @NumericSelectColNames varchar(MAX) + SELECT + @NumericSelectColNames = STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION) + FROM #tmpNumericColumns - DECLARE @pkColName varchar(256) + -- figure out which primary key column this table has + DECLARE @pkColName varchar(MAX) SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority - PRINT @pkColName + -- PRINT @pkColName -- create comma delimited list of columns to be selected from the source table, including casting to varchar DECLARE @SourceSelectColNames varchar(MAX) - SELECT @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') + SELECT + @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') FROM #tmpColNames + WHERE COLUMN_NAME != @pkColName + + SET @SourceSelectColNames = CAST('[' AS varchar(MAX)) + @pkColName + CAST('], ' AS varchar(MAX)) + @SourceSelectColNames -- PRINT @SourceSelectColNames - -- create comma delimited list of columns to be unpivoted, excluding primary key columns and old metadata + -- create comma delimited list of columns to be unpivoted, excluding primary key column DECLARE @UnpivotColNames varchar(MAX) - SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') + SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION) FROM #tmpColNames WHERE - -- primary key column names - -- TODO: copy them out of the #tmpPkColNames so we only need to update constants in one place - COLUMN_NAME != 'SEQN' - AND COLUMN_NAME != 'SAMPLEID' - AND COLUMN_NAME != 'DRXFDCD' - AND COLUMN_NAME != 'DRXMC' - AND COLUMN_NAME != 'POOLID' - -- metadata column names - -- TODO: remove these and test - AND COLUMN_NAME != 'DownloadUrl' - AND COLUMN_NAME != 'Questionnaire' - AND COLUMN_NAME != 'Description' + COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) -- PRINT @UnpivotColNames @@ -133,19 +158,24 @@ AS -- assemble SQL to pivot the translated table back into the original schema DECLARE @PivotStmt varchar(MAX) SET @PivotStmt = ' - SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ( - SELECT - ' + @pkColName + ', - Variable, - ValueDescription - FROM ' + @TranslatedTempTableName + ' - ) AS SourceTable - PIVOT ( - MAX(ValueDescription) - FOR Variable IN ( - ' + @UnpivotColNames + ' - ) - ) AS PivotTable + WITH PivotTable AS( + SELECT * + FROM ( + SELECT + ' + @pkColName + ', + Variable, + ValueDescription + FROM ' + @TranslatedTempTableName + ' + ) AS SourceTable + PIVOT ( + MAX(ValueDescription) + FOR Variable IN ( + ' + @UnpivotColNames + ' + ) + ) AS PivotTable + ) + SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' + FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + '] ' -- PRINT @PivotStmt @@ -165,7 +195,8 @@ AS DECLARE @IndexStmt varchar(8000) SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName - --EXEC('SELECT * FROM ' + @DestinationTableName) + -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN') + -- EXEC('SELECT TOP 5 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN') DECLARE @GarbageCollectionStmt varchar(8000) SET @GarbageCollectionStmt = 'DROP TABLE IF EXISTS ' + @UnpivotTempTableName From 40b23d64350bb7941327b330745f15800e79ce9a Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 16:33:39 -0400 Subject: [PATCH 11/21] add debugging tool to run a single table translation --- Code/Sql/RunTableTranslateOnSingleTable.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql index 1612f49..f02e280 100644 --- a/Code/Sql/RunTableTranslateOnSingleTable.sql +++ b/Code/Sql/RunTableTranslateOnSingleTable.sql @@ -1,4 +1,4 @@ --- Manually run spTranslateTable +-- Manually run spTranslateTable for debugging DECLARE @RC int DECLARE @SourceTableSchema varchar(8000) @@ -9,7 +9,7 @@ DECLARE @DestinationTableName varchar(128) -- TODO: Set parameter values here. -- CAFE_G SET @SourceTableSchema = 'Raw' -SET @SourceTableName = 'AUX_I' +SET @SourceTableName = 'VARLK_C' SET @DestinationTableSchema = 'Translated' EXECUTE @RC = [dbo].[spTranslateTable] @@ -18,4 +18,7 @@ EXECUTE @RC = [dbo].[spTranslateTable] ,@DestinationTableSchema ,@DestinationTableName = @SourceTableName -EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName) \ No newline at end of file +EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName ) --+ ' ORDER BY SEQN') +EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName ) -- + ' ORDER BY SEQN') + +-- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='AUX_J' AND (Variable='AUQ011' OR Variable='AUQ540') \ No newline at end of file From 21742ce3a3fd1db404d6b3dedca878df60bec336 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 16:34:11 -0400 Subject: [PATCH 12/21] Handle cases when source tables contain all categorical variables or all numeric variables, or a combination of categorical and numeric variables. --- Code/Sql/spTranslateTable.sql | 125 ++++++++++++++++++++++++++-------- 1 file changed, 97 insertions(+), 28 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 9f7b943..189b40a 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -1,6 +1,9 @@ /* Stored procedure to translate variable responses in an NHANES questionnaire table */ +DROP PROC IF EXISTS spTranslateTable +GO + CREATE PROC spTranslateTable @SourceTableSchema varchar(MAX), @SourceTableName varchar(MAX), @@ -51,8 +54,9 @@ AS TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema + -- debugging -- SELECT * FROM #tmpColNames - + -- enumerate possible primary key column names DROP TABLE IF EXISTS #tmpPkColNames CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int) @@ -62,6 +66,13 @@ AS INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4) INSERT INTO #tmpPkColNames VALUES ('POOLID', 5) + -- figure out which primary key column this table has + DECLARE @pkColName varchar(MAX) + SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority + + -- debugging + -- PRINT @pkColName + -- Figure out which columns should remain numeric and not get translated DROP TABLE IF EXISTS #tmpNumericColumns @@ -76,25 +87,42 @@ AS WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) GROUP BY C.COLUMN_NAME, ORDINAL_POSITION + -- debugging -- SELECT * FROM #tmpNumericColumns -- remove the numeric column names from the table that contains all columns to be translated DELETE FROM #tmpColNames WHERE COLUMN_NAME IN (SELECT COLUMN_NAME FROM #tmpNumericColumns) - -- SELECT * FROM #tmpColNames + -- check whether there are any categorical columns left + DECLARE @categoricalVariableCount INT + SELECT @categoricalVariableCount = COUNT(*) FROM #tmpColNames WHERE COLUMN_NAME != @pkColName + + -- debugging + -- PRINT 'number categorical cols left:' + -- PRINT @categoricalVariableCount + + -- if there are no translatable variables for this table, just copy it over + IF @categoricalVariableCount = 0 + BEGIN + EXEC(' + DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + ' + SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' + CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName) + + -- we can't do any of the steps below, so just return + RETURN + END -- create comma delimited list of the numeric columns, will be used later to retrieve them from the source table DECLARE @NumericSelectColNames varchar(MAX) SELECT @NumericSelectColNames = STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION) FROM #tmpNumericColumns - - -- figure out which primary key column this table has - DECLARE @pkColName varchar(MAX) - SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority - - -- PRINT @pkColName + -- debugging + -- PRINT 'Numeric cols:' + -- PRINT @NumericSelectColNames + -- create comma delimited list of columns to be selected from the source table, including casting to varchar DECLARE @SourceSelectColNames varchar(MAX) SELECT @@ -104,6 +132,8 @@ AS SET @SourceSelectColNames = CAST('[' AS varchar(MAX)) + @pkColName + CAST('], ' AS varchar(MAX)) + @SourceSelectColNames + -- debugging + -- PRINT 'source select col names:' -- PRINT @SourceSelectColNames -- create comma delimited list of columns to be unpivoted, excluding primary key column @@ -113,6 +143,8 @@ AS WHERE COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) + -- debugging + -- PRINT 'unpivot col names:' -- PRINT @UnpivotColNames -- assemble dynamic SQL to unpivot the original table @@ -131,6 +163,7 @@ AS ) ) AS unpvt' + -- debugging -- PRINT @unpivotStmt EXEC ('DROP TABLE IF EXISTS ' + @UnpivotTempTableName) EXEC(@unpivotStmt) @@ -152,32 +185,67 @@ AS AND CAST(T.Response AS VARCHAR) = CAST(V.CodeOrValue AS VARCHAR) AND V.TableName = ''' + @SourceTableName + ''' ' + + -- debugging -- PRINT @TranslateStmt EXEC (@TranslateStmt) -- assemble SQL to pivot the translated table back into the original schema DECLARE @PivotStmt varchar(MAX) - SET @PivotStmt = ' - WITH PivotTable AS( - SELECT * - FROM ( - SELECT - ' + @pkColName + ', - Variable, - ValueDescription - FROM ' + @TranslatedTempTableName + ' - ) AS SourceTable - PIVOT ( - MAX(ValueDescription) - FOR Variable IN ( - ' + @UnpivotColNames + ' - ) - ) AS PivotTable - ) - SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' - FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + '] - ' + -- if there are numeric columns that need to be merged with the translated categotical columns + IF @NumericSelectColNames IS NOT NULL + BEGIN + -- build the query to include a join against the source table + SET @PivotStmt = ' + WITH PivotTable AS( + SELECT * + FROM ( + SELECT + ' + @pkColName + ', + Variable, + ValueDescription + FROM ' + @TranslatedTempTableName + ' + ) AS SourceTable + PIVOT ( + MAX(ValueDescription) + FOR Variable IN ( + ' + @UnpivotColNames + ' + ) + ) AS PivotTable + ) + SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' + FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + '] + ' + END + ELSE + BEGIN + -- since we have no columns to merge from the original source table, just pivot the + -- translated variables and insert into the destination table + SET @PivotStmt = ' + WITH PivotTable AS( + SELECT * + FROM ( + SELECT + ' + @pkColName + ', + Variable, + ValueDescription + FROM ' + @TranslatedTempTableName + ' + ) AS SourceTable + PIVOT ( + MAX(ValueDescription) + FOR Variable IN ( + ' + @UnpivotColNames + ' + ) + ) AS PivotTable + ) + SELECT P.* INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' + FROM PivotTable P + ' + END + + -- debugging + -- PRINT 'Pivot statement:' -- PRINT @PivotStmt EXEC(@PivotStmt) @@ -195,6 +263,7 @@ AS DECLARE @IndexStmt varchar(8000) SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName + -- debugging -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN') -- EXEC('SELECT TOP 5 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN') From 30edb3202645a0e5f49cc987226fffe7704b5e71 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 16:40:26 -0400 Subject: [PATCH 13/21] update environmnet variables to prepare for v0.0.4 release --- Container/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Container/Dockerfile b/Container/Dockerfile index 4be165e..8f02fef 100644 --- a/Container/Dockerfile +++ b/Container/Dockerfile @@ -354,8 +354,8 @@ RUN runuser -m -p mssql -c '/opt/mssql/bin/sqlservr &' \ WORKDIR /NHANES # Declare which version of the container this is, and make it available inside the container -ENV EPICONDUCTOR_CONTAINER_VERSION v0.0.3 -ENV EPICONDUCTOR_COLLECTION_DATE 2023-06-13 +ENV EPICONDUCTOR_CONTAINER_VERSION v0.0.4 +ENV EPICONDUCTOR_COLLECTION_DATE 2023-06-28 RUN echo "EPICONDUCTOR_CONTAINER_VERSION=$EPICONDUCTOR_CONTAINER_VERSION" >> /usr/local/lib/R/etc/Renviron RUN echo "EPICONDUCTOR_COLLECTION_DATE=$EPICONDUCTOR_COLLECTION_DATE" >> /usr/local/lib/R/etc/Renviron From 17014e6e1a3a78743f759d7b17fae09963d40207 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 17:10:56 -0400 Subject: [PATCH 14/21] add SHRINKFILE and CHECKPOINT to the end of the script that orchestrates table translation --- Code/R/translateRawTables.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Code/R/translateRawTables.R b/Code/R/translateRawTables.R index 4403233..226ee1f 100644 --- a/Code/R/translateRawTables.R +++ b/Code/R/translateRawTables.R @@ -45,3 +45,8 @@ for (i in 1:nrow(tableList)) { SqlTools::dbSendUpdate(cn, stmt) } +# shrink transaction log +SqlTools::dbSendUpdate(cn, "DBCC SHRINKFILE(NhanesLandingZone_log)") + +# issue checkpoint +SqlTools::dbSendUpdate(cn, "CHECKPOINT") From d66b0c6840d215c372dac918c49822c5d3502ef0 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 17:12:27 -0400 Subject: [PATCH 15/21] fix clustered columnstore index names on both raw and translated tables --- Code/R/download.R | 2 +- Code/Sql/spTranslateTable.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Code/R/download.R b/Code/R/download.R index bc7ce38..b27c4b1 100644 --- a/Code/R/download.R +++ b/Code/R/download.R @@ -343,7 +343,7 @@ for (i in i:length(dataTypes)) { SqlTools::dbSendUpdate(cn, insertStatement) indexStatement = paste(sep="", - "CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON Raw.", currDataType) + "CREATE CLUSTERED COLUMNSTORE INDEX ccix ON Raw.", currDataType) SqlTools::dbSendUpdate(cn, indexStatement) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 189b40a..3ed6439 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -261,7 +261,7 @@ AS -- create a clustered index on the destination table with compression DECLARE @IndexStmt varchar(8000) - SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName + SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX ccix ON ' + @DestinationTableSchema + '.' + @DestinationTableName -- debugging -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN') From 2b57ed3c71589f868f8a89c8c97b37a530d58c63 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 17:33:02 -0400 Subject: [PATCH 16/21] updated example debug script --- Code/Sql/RunTableTranslateOnSingleTable.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql index f02e280..abc9a07 100644 --- a/Code/Sql/RunTableTranslateOnSingleTable.sql +++ b/Code/Sql/RunTableTranslateOnSingleTable.sql @@ -9,7 +9,7 @@ DECLARE @DestinationTableName varchar(128) -- TODO: Set parameter values here. -- CAFE_G SET @SourceTableSchema = 'Raw' -SET @SourceTableName = 'VARLK_C' +SET @SourceTableName = 'POOLTF_G' SET @DestinationTableSchema = 'Translated' EXECUTE @RC = [dbo].[spTranslateTable] @@ -18,7 +18,7 @@ EXECUTE @RC = [dbo].[spTranslateTable] ,@DestinationTableSchema ,@DestinationTableName = @SourceTableName -EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName ) --+ ' ORDER BY SEQN') -EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName ) -- + ' ORDER BY SEQN') +EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN') +EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN') --- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='AUX_J' AND (Variable='AUQ011' OR Variable='AUQ540') \ No newline at end of file +-- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='POOLTF_G' \ No newline at end of file From fe1465dd5928652565510fb8ac431d507952ad7f Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 17:34:10 -0400 Subject: [PATCH 17/21] fix bug when processing crosswalk tables that map SEQN to SAMPLEID --- Code/Sql/spTranslateTable.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 3ed6439..2818364 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -84,7 +84,7 @@ AS V.TableName = @SourceTableName AND C.COLUMN_NAME = V.Variable AND V.ValueDescription = 'Range of Values' - WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) + WHERE C.COLUMN_NAME != @pkColName GROUP BY C.COLUMN_NAME, ORDINAL_POSITION -- debugging From 19c285292d55110c837016e9415ecaca827a3c9f Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 20:22:07 -0400 Subject: [PATCH 18/21] removed DROP PROC... GO from Code/Sql/spTranslateTable.sql --- Code/Sql/spTranslateTable.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 2818364..2a5f6cf 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -1,8 +1,6 @@ /* Stored procedure to translate variable responses in an NHANES questionnaire table */ -DROP PROC IF EXISTS spTranslateTable -GO CREATE PROC spTranslateTable @SourceTableSchema varchar(MAX), From 6b4232222fa0463afe96bafd0b5ee130eff121f7 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 20:31:23 -0400 Subject: [PATCH 19/21] Exclude only the PK column from the list of columns to unpivot. This wasn't causeing any issues, since the FKs are numeric and wouldn't end up in the unpivot list, but it was technically not correct. --- Code/Sql/spTranslateTable.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql index 2a5f6cf..69a8e0e 100644 --- a/Code/Sql/spTranslateTable.sql +++ b/Code/Sql/spTranslateTable.sql @@ -139,7 +139,7 @@ AS SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION) FROM #tmpColNames WHERE - COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames) + COLUMN_NAME != @pkColName -- debugging -- PRINT 'unpivot col names:' From 5c12b62256c3159f384321c24fc0202973157e26 Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 21:05:02 -0400 Subject: [PATCH 20/21] Remove ReleaseNotes_v0_0_3.txt. If we want release notes, they should all be in one file, cronologically. --- ReleaseNotes_v0_0_3.txt | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 ReleaseNotes_v0_0_3.txt diff --git a/ReleaseNotes_v0_0_3.txt b/ReleaseNotes_v0_0_3.txt deleted file mode 100644 index b53ca8f..0000000 --- a/ReleaseNotes_v0_0_3.txt +++ /dev/null @@ -1,30 +0,0 @@ -################################################# -# Version 0.0.3 Release Notes # -################################################# - -#The following tables/columns are expected in the db build - -[NhanesLandingZone].[Metadata].[DownloadErrors] : DataType, FileUrl, Error -[NhanesLandingZone].[Metadata].[QuestionnaireDescriptions] : Description, DataGroup, TableName -[NhanesLandingZone].[Metadata].[QuestionnaireVariables] : Variable, TableName, Description, Target, SasLabel -[NhanesLandingZone].[Metadata].[VariableCodebook] : Variable, TableName, CodeOrValue, ValueDescription, Count, Cumulative, SkipToItem - -[NhanesLandingZone].[Ontology].[dbxrefs] : Subject, Object, Ontology -[NhanesLandingZone].[Ontology].[edges] : Subject, Object, Ontology -[NhanesLandingZone].[Ontology].[entailed_edges] : Subject, Object, Ontology -[NhanesLandingZone].[Ontology].[labels] : Subject, Object, IRI, Ontology, Direct, Inherited -[NhanesLandingZone].[Ontology].[nhanes_variables_mappings] : Variable, TableName, SourceTermID, SourceTerm, MappedTempLabel, MappedTermCURIE, MappedTermIRI, MappingScore, Tags, Ontology - -#Sample Queries, and expected outputs: - -Query: SELECT * FROM [NhanesLandingZone].[Metadata].[QuestionnaireVariables] -Expected Output: Around 42,000 rows, NULLS expected for Variable='SEQN' in 'Target' column - -Query: -Expected Output: - -Query: -Expected Output: - -Query: -Expected Output: \ No newline at end of file From de059f75036ba20f1807ad791215fe9682cfecbf Mon Sep 17 00:00:00 2001 From: Nathan Palmer Date: Wed, 28 Jun 2023 22:19:15 -0400 Subject: [PATCH 21/21] change reference to version number in preparation for 0.0.4 release --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fb966db..2f110c5 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ docker \ -e 'CONTAINER_USER_PASSWORD=PASSWORD' \ -e 'ACCEPT_EULA=Y' \ -e 'SA_PASSWORD=yourStrong(!)Password' \ - hmsccb/nhanes-workbench:latest + hmsccb/nhanes-workbench:hmsccb/nhanes-workbench:version-0.0.4 ``` ### Parameters