From 4b9dd06a49119545cf3f0db13d45dbf0f9766159 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Wed, 21 Jun 2023 22:13:50 -0400
Subject: [PATCH 01/21] cast all columns in the UNPIVOT statement to
 varchar(256) to prevent conflicting column types from causing the UNPIVOT to
 fail

---
 Code/Sql/spTranslateTable.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index cbdd98e..a02ce65 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -29,9 +29,9 @@ AS
     FROM INFORMATION_SCHEMA.COLUMNS
     WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema
 
-    -- create comma delimited list of columns to be selected from the source table
+    -- create comma delimited list of columns to be selected from the source table, including casting to varchar
     DECLARE @SourceSelectColNames varchar(8000)
-    SELECT @SourceSelectColNames=STRING_AGG('[' + COLUMN_NAME + ']', ', ') 
+    SELECT @SourceSelectColNames=STRING_AGG('CAST ([' + COLUMN_NAME + '] AS varchar(256)) AS ' + COLUMN_NAME, ', ') 
     FROM #tmpColNames
 
     -- PRINT @SourceSelectColNames

From 6c9a7092599bd4b42b5ecddd50737b5b6f02406b Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 17:15:39 -0400
Subject: [PATCH 02/21] fixes STRING_AGG varchar overflow

---
 Code/Sql/spTranslateTable.sql | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index a02ce65..6422838 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -1,7 +1,6 @@
     /*
         Stored procedure to translate variable responses in an NHANES questionnaire table
     */
-
 CREATE PROC spTranslateTable 
     @SourceTableSchema varchar(8000),
     @SourceTableName varchar(128),
@@ -28,17 +27,31 @@ AS
     INTO #tmpColNames
     FROM INFORMATION_SCHEMA.COLUMNS
     WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema
+    
+    -- figure out whether this table has a SEQN or SAMPLEID primary key
+    DECLARE @pkColName varchar(256)
+    DECLARE @flag int
+    SELECT @flag = COUNT(1) FROM #tmpColNames WHERE COLUMN_NAME='SEQN'
+
+    IF @flag = 1
+        SET @pkColName = 'SEQN'
+    ELSE
+        SET @pkColName = 'SAMPLEID'
+    
+    PRINT @pkColName
+    
+    -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName
 
     -- create comma delimited list of columns to be selected from the source table, including casting to varchar
-    DECLARE @SourceSelectColNames varchar(8000)
-    SELECT @SourceSelectColNames=STRING_AGG('CAST ([' + COLUMN_NAME + '] AS varchar(256)) AS ' + COLUMN_NAME, ', ') 
+    DECLARE @SourceSelectColNames varchar(MAX)
+    SELECT @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ')
     FROM #tmpColNames
 
     -- PRINT @SourceSelectColNames
 
     -- create comma delimited list of columns to be unpivoted
-    DECLARE @UnpivotColNames varchar(8000)
-    SELECT @UnpivotColNames=STRING_AGG('[' + COLUMN_NAME + ']', ', ') 
+    DECLARE @UnpivotColNames varchar(MAX)
+    SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') 
     FROM #tmpColNames
     WHERE 
         COLUMN_NAME != 'SEQN'
@@ -49,7 +62,7 @@ AS
     -- PRINT @UnpivotColNames
 
     -- assemble dynamic SQL to unpivot the original table
-    DECLARE @unpivotStmt varchar(8000)
+    DECLARE @unpivotStmt varchar(MAX)
     SET @unpivotStmt = '
         SELECT SEQN, Variable, Response 
         INTO ' + @UnpivotTempTableName + '
@@ -70,7 +83,7 @@ AS
 
     -- assemble SQL to join the unpivoted table to the variable codebook
     -- to decode the responses
-    DECLARE @TranslateStmt varchar(8000)
+    DECLARE @TranslateStmt varchar(MAX)
     SET @TranslateStmt = '
         SELECT 
             T.SEQN,
@@ -89,7 +102,7 @@ AS
     EXEC (@TranslateStmt)
 
     -- assemble SQL to pivot the translated table back into the original schema
-    DECLARE @PivotStmt varchar(8000)
+    DECLARE @PivotStmt varchar(MAX)
     SET @PivotStmt = '
         SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM (
             SELECT 

From 1437bbc4a56f94496c57e9912f8aae0c082f30a9 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 21:19:21 -0400
Subject: [PATCH 03/21] Accommodate NHANES tables being translated by
 Code/Sql/spTranslateTable.sql in the Raw schema where the data is from pooled
 samples and the primary key is SAMPLEID rather than SEQN.

---
 Code/Sql/spTranslateTable.sql | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 6422838..f16bd6d 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -1,11 +1,11 @@
-    /*
-        Stored procedure to translate variable responses in an NHANES questionnaire table
-    */
+/*
+    Stored procedure to translate variable responses in an NHANES questionnaire table
+*/
 CREATE PROC spTranslateTable 
-    @SourceTableSchema varchar(8000),
-    @SourceTableName varchar(128),
-    @DestinationTableSchema varchar(8000),
-    @DestinationTableName varchar(128)
+    @SourceTableSchema varchar(MAX),
+    @SourceTableName varchar(MAX),
+    @DestinationTableSchema varchar(MAX),
+    @DestinationTableName varchar(MAX)
 AS
 
     -- drop the destination table if it exists
@@ -55,6 +55,7 @@ AS
     FROM #tmpColNames
     WHERE 
         COLUMN_NAME != 'SEQN'
+        AND COLUMN_NAME != 'SAMPLEID'
         AND COLUMN_NAME != 'DownloadUrl'
         AND COLUMN_NAME != 'Questionnaire'
         AND COLUMN_NAME != 'Description'
@@ -64,7 +65,7 @@ AS
     -- assemble dynamic SQL to unpivot the original table
     DECLARE @unpivotStmt varchar(MAX)
     SET @unpivotStmt = '
-        SELECT SEQN, Variable, Response 
+        SELECT ' + @pkColName + ', Variable, Response 
         INTO ' + @UnpivotTempTableName + '
         FROM (
             SELECT 
@@ -86,7 +87,7 @@ AS
     DECLARE @TranslateStmt varchar(MAX)
     SET @TranslateStmt = '
         SELECT 
-            T.SEQN,
+            T.' + @pkColName + ',
             T.Variable,
             COALESCE(CAST(V.ValueDescription AS VARCHAR), CAST(T.Response AS VARCHAR)) AS ValueDescription
         INTO 
@@ -106,7 +107,7 @@ AS
     SET @PivotStmt = '
         SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM (
             SELECT 
-                SEQN, 
+                ' + @pkColName + ', 
                 Variable, 
                 ValueDescription 
             FROM ' + @TranslatedTempTableName + '
@@ -125,8 +126,8 @@ AS
     -- insert rows where all of the variable responses were NULL in the original data
     DECLARE @InsertNullStmt varchar(8000)
     SET @InsertNullStmt = '
-        INSERT INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' (SEQN)
-        SELECT A.SEQN FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' A LEFT OUTER JOIN ' + @DestinationTableSchema + '.' + @DestinationTableName + ' T ON A.SEQN = T.SEQN WHERE T.SEQN IS NULL
+        INSERT INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' (' + @pkColName + ')
+        SELECT A.' + @pkColName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' A LEFT OUTER JOIN ' + @DestinationTableSchema + '.' + @DestinationTableName + ' T ON A.' + @pkColName + ' = T.' + @pkColName + ' WHERE T.' + @pkColName + ' IS NULL
     '
 
     -- PRINT @InsertNullStmt

From 6dc2b5707e15d36dae80878b49b41f2f9c034332 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 21:35:50 -0400
Subject: [PATCH 04/21] add debugging scripts

---
 .../RerunTableTranslateOnMissingTables.sql    | 32 +++++++++++++++++++
 Code/Sql/RunTableTranslateOnSingleTable       | 21 ++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 Code/Sql/RerunTableTranslateOnMissingTables.sql
 create mode 100644 Code/Sql/RunTableTranslateOnSingleTable

diff --git a/Code/Sql/RerunTableTranslateOnMissingTables.sql b/Code/Sql/RerunTableTranslateOnMissingTables.sql
new file mode 100644
index 0000000..715ab40
--- /dev/null
+++ b/Code/Sql/RerunTableTranslateOnMissingTables.sql
@@ -0,0 +1,32 @@
+-- loop over all missing tables in the last release
+ SELECT TABLE_NAME
+ INTO #tmpRawTables
+    FROM INFORMATION_SCHEMA.TABLES
+    WHERE TABLE_SCHEMA = 'Raw'
+
+ SELECT TABLE_NAME
+ INTO #tmpTranslatedTables
+    FROM INFORMATION_SCHEMA.TABLES
+    WHERE TABLE_SCHEMA = 'Translated'
+
+DROP TABLE IF EXISTS #tmpDebugTables
+SELECT R.TABLE_NAME INTO #tmpDebugTables FROM #tmpRawTables R LEFT OUTER JOIN #tmpTranslatedTables T ON R.TABLE_NAME = T.TABLE_NAME WHERE T.TABLE_NAME IS NULL ORDER BY R.TABLE_NAME
+
+DECLARE cTableNames CURSOR FOR SELECT * FROM #tmpDebugTables ORDER BY TABLE_NAME
+DECLARE @currTableName varchar(256)
+OPEN cTableNames
+
+FETCH NEXT FROM cTableNames INTO @currTableName
+WHILE @@FETCH_STATUS = 0  
+BEGIN
+    PRINT @currTableName
+    EXECUTE dbo.spTranslateTable
+        'Raw'
+        ,@currTableName
+        ,'Translated'
+        ,@currTableName
+    FETCH NEXT FROM cTableNames INTO @currTableName
+END
+
+CLOSE cTableNames
+DEALLOCATE cTableNames
\ No newline at end of file
diff --git a/Code/Sql/RunTableTranslateOnSingleTable b/Code/Sql/RunTableTranslateOnSingleTable
new file mode 100644
index 0000000..cb794fa
--- /dev/null
+++ b/Code/Sql/RunTableTranslateOnSingleTable
@@ -0,0 +1,21 @@
+-- Manually run 
+
+DECLARE @RC int
+DECLARE @SourceTableSchema varchar(8000)
+DECLARE @SourceTableName varchar(128)
+DECLARE @DestinationTableSchema varchar(8000)
+DECLARE @DestinationTableName varchar(128)
+
+-- TODO: Set parameter values here.
+-- CAFE_G
+SET @SourceTableSchema  = 'Raw'
+SET @SourceTableName  = 'AUX_I'
+SET @DestinationTableSchema  = 'Translated'
+
+EXECUTE @RC = [dbo].[spTranslateTable] 
+   @SourceTableSchema
+  ,@SourceTableName
+  ,@DestinationTableSchema
+  ,@DestinationTableName = @SourceTableName
+
+EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName)  
\ No newline at end of file

From c282b344f8a78deb7b007308ea231dc22e7c7590 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 21:40:36 -0400
Subject: [PATCH 05/21] fixed comment

---
 Code/Sql/RunTableTranslateOnSingleTable.sql | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Code/Sql/RunTableTranslateOnSingleTable.sql

diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql
new file mode 100644
index 0000000..1612f49
--- /dev/null
+++ b/Code/Sql/RunTableTranslateOnSingleTable.sql
@@ -0,0 +1,21 @@
+-- Manually run spTranslateTable
+
+DECLARE @RC int
+DECLARE @SourceTableSchema varchar(8000)
+DECLARE @SourceTableName varchar(128)
+DECLARE @DestinationTableSchema varchar(8000)
+DECLARE @DestinationTableName varchar(128)
+
+-- TODO: Set parameter values here.
+-- CAFE_G
+SET @SourceTableSchema  = 'Raw'
+SET @SourceTableName  = 'AUX_I'
+SET @DestinationTableSchema  = 'Translated'
+
+EXECUTE @RC = [dbo].[spTranslateTable] 
+   @SourceTableSchema
+  ,@SourceTableName
+  ,@DestinationTableSchema
+  ,@DestinationTableName = @SourceTableName
+
+EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName)  
\ No newline at end of file

From 25d38b44c4b41fe4ca511003e308249ce497c318 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 21:51:41 -0400
Subject: [PATCH 06/21] old delete

---
 Code/Sql/RunTableTranslateOnSingleTable | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 Code/Sql/RunTableTranslateOnSingleTable

diff --git a/Code/Sql/RunTableTranslateOnSingleTable b/Code/Sql/RunTableTranslateOnSingleTable
deleted file mode 100644
index cb794fa..0000000
--- a/Code/Sql/RunTableTranslateOnSingleTable
+++ /dev/null
@@ -1,21 +0,0 @@
--- Manually run 
-
-DECLARE @RC int
-DECLARE @SourceTableSchema varchar(8000)
-DECLARE @SourceTableName varchar(128)
-DECLARE @DestinationTableSchema varchar(8000)
-DECLARE @DestinationTableName varchar(128)
-
--- TODO: Set parameter values here.
--- CAFE_G
-SET @SourceTableSchema  = 'Raw'
-SET @SourceTableName  = 'AUX_I'
-SET @DestinationTableSchema  = 'Translated'
-
-EXECUTE @RC = [dbo].[spTranslateTable] 
-   @SourceTableSchema
-  ,@SourceTableName
-  ,@DestinationTableSchema
-  ,@DestinationTableName = @SourceTableName
-
-EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName)  
\ No newline at end of file

From 692a94ac62376127452418ba1a5441862e54ceeb Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Thu, 22 Jun 2023 22:01:00 -0400
Subject: [PATCH 07/21] remove debugging statements

---
 Code/Sql/spTranslateTable.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index f16bd6d..f3b1572 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -38,7 +38,7 @@ AS
     ELSE
         SET @pkColName = 'SAMPLEID'
     
-    PRINT @pkColName
+    -- PRINT @pkColName
     
     -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName
 

From 5f4b4e02e5c6d9d006eea5ec203aaf3b9db26981 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Fri, 23 Jun 2023 15:40:26 -0400
Subject: [PATCH 08/21] allows passthrough of untranslate-able lookup tables
 and identified various other primary keys that are used instead of SEQN

---
 .../RerunTableTranslateOnMissingTables.sql    | 10 ++++
 Code/Sql/spTranslateTable.sql                 | 52 ++++++++++++++-----
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/Code/Sql/RerunTableTranslateOnMissingTables.sql b/Code/Sql/RerunTableTranslateOnMissingTables.sql
index 715ab40..7ec712b 100644
--- a/Code/Sql/RerunTableTranslateOnMissingTables.sql
+++ b/Code/Sql/RerunTableTranslateOnMissingTables.sql
@@ -1,3 +1,11 @@
+/*
+    This is a debugging tool to help identify tables that fail to translate and re-run 
+    their translations once a bugfix has been proposed.
+*/
+
+DROP TABLE IF EXISTS #tmpRawTables
+DROP TABLE IF EXISTS #tmpTranslatedTables
+
 -- loop over all missing tables in the last release
  SELECT TABLE_NAME
  INTO #tmpRawTables
@@ -12,6 +20,8 @@
 DROP TABLE IF EXISTS #tmpDebugTables
 SELECT R.TABLE_NAME INTO #tmpDebugTables FROM #tmpRawTables R LEFT OUTER JOIN #tmpTranslatedTables T ON R.TABLE_NAME = T.TABLE_NAME WHERE T.TABLE_NAME IS NULL ORDER BY R.TABLE_NAME
 
+SELECT * FROM #tmpDebugTables
+
 DECLARE cTableNames CURSOR FOR SELECT * FROM #tmpDebugTables ORDER BY TABLE_NAME
 DECLARE @currTableName varchar(256)
 OPEN cTableNames
diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index f3b1572..7c44bc8 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -8,6 +8,26 @@ CREATE PROC spTranslateTable
     @DestinationTableName varchar(MAX)
 AS
 
+    -- check that the variable codebook actually has data for this table
+    DECLARE @variableTranslationCount INT
+    SELECT @variableTranslationCount = COUNT(*) FROM Metadata.VariableCodebook C WHERE C.TableName = @SourceTableName
+    
+    -- if there are no translatable variables for this table, just copy it over
+    IF @variableTranslationCount = 0
+        BEGIN
+        
+            PRINT 'There are no variables available to translate for ' + @SourceTableSchema + '.' + @SourceTableName + '.  Copying table as-is.'
+            DECLARE @CopyTableStatement varchar(MAX)
+            SET @CopyTableStatement = '
+                DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + '
+                SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + '
+                CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName
+        
+            EXEC(@CopyTableStatement)
+            -- we can't do any of the steps below, so just return
+            RETURN
+        END
+    
     -- drop the destination table if it exists
     DECLARE @DropDestinationStmt varchar(8000)
     SET @DropDestinationStmt = 'DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName
@@ -28,19 +48,20 @@ AS
     FROM INFORMATION_SCHEMA.COLUMNS
     WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema
     
-    -- figure out whether this table has a SEQN or SAMPLEID primary key
+    -- figure out which primary key column this table has
+    DROP TABLE IF EXISTS #tmpPkColNames
+    CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int)
+
+    INSERT INTO #tmpPkColNames VALUES ('SEQN', 1)
+    INSERT INTO #tmpPkColNames VALUES ('SAMPLEID', 2)
+    INSERT INTO #tmpPkColNames VALUES ('DRXFDCD', 3)
+    INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4)
+    INSERT INTO #tmpPkColNames VALUES ('POOLID', 4)
+    
     DECLARE @pkColName varchar(256)
-    DECLARE @flag int
-    SELECT @flag = COUNT(1) FROM #tmpColNames WHERE COLUMN_NAME='SEQN'
+    SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority
 
-    IF @flag = 1
-        SET @pkColName = 'SEQN'
-    ELSE
-        SET @pkColName = 'SAMPLEID'
-    
-    -- PRINT @pkColName
-    
-    -- TODO: go through the rest of the SPROC and replace SEQN constants with @pkColName
+    PRINT @pkColName
 
     -- create comma delimited list of columns to be selected from the source table, including casting to varchar
     DECLARE @SourceSelectColNames varchar(MAX)
@@ -49,13 +70,20 @@ AS
 
     -- PRINT @SourceSelectColNames
 
-    -- create comma delimited list of columns to be unpivoted
+    -- create comma delimited list of columns to be unpivoted, excluding primary key columns and old metadata
     DECLARE @UnpivotColNames varchar(MAX)
     SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') 
     FROM #tmpColNames
     WHERE 
+        -- primary key column names
+        -- TODO: copy them out of the #tmpPkColNames so we only need to update constants in one place
         COLUMN_NAME != 'SEQN'
         AND COLUMN_NAME != 'SAMPLEID'
+        AND COLUMN_NAME != 'DRXFDCD'
+        AND COLUMN_NAME != 'DRXMC'
+        AND COLUMN_NAME != 'POOLID'
+        -- metadata column names
+        -- TODO: remove these and test
         AND COLUMN_NAME != 'DownloadUrl'
         AND COLUMN_NAME != 'Questionnaire'
         AND COLUMN_NAME != 'Description'

From 720274606c63957e4e16bc9dc6e43c8719f7638b Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Tue, 27 Jun 2023 17:12:42 -0400
Subject: [PATCH 09/21] preserves INT column types on additional column names
 that appear as primary keys: SAMPLEID, DRXFDCD, DRXMC, POOLID

---
 Code/R/download.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Code/R/download.R b/Code/R/download.R
index 925130b..bc7ce38 100644
--- a/Code/R/download.R
+++ b/Code/R/download.R
@@ -318,8 +318,12 @@ for (i in i:length(dataTypes)) {
       # change DOUBLE to float
       createTableQuery = gsub(createTableQuery, pattern = "\" DOUBLE", replace = "\" float", fixed = TRUE)
 
-      # we know that SEQN should always be an INT
+      # we know that SEQN and other primary keys should always be an INT
       createTableQuery = gsub(createTableQuery, pattern = "\"SEQN\" float", replace = "\"SEQN\" INT", fixed = TRUE) # nolint
+      createTableQuery = gsub(createTableQuery, pattern = "\"SAMPLEID\" float", replace = "\"SAMPLEID\" INT", fixed = TRUE) # nolint
+      createTableQuery = gsub(createTableQuery, pattern = "\"DRXFDCD\" float", replace = "\"DRXFDCD\" INT", fixed = TRUE) # nolint
+      createTableQuery = gsub(createTableQuery, pattern = "\"DRXMC\" float", replace = "\"DRXMC\" INT", fixed = TRUE) # nolint
+      createTableQuery = gsub(createTableQuery, pattern = "\"POOLID\" float", replace = "\"POOLID\" INT", fixed = TRUE) # nolint
 
       # remove double quotes, which interferes with the schema specification
       createTableQuery = gsub(createTableQuery, pattern = '"', replace = "", fixed = TRUE)

From 6d0e82302e582c823ec6c64340853897194ad267 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Tue, 27 Jun 2023 17:14:29 -0400
Subject: [PATCH 10/21] Multiple changes to address #51, attempt to only
 translate categorical variables and "pass-through" the variables whose
 ValueDescription matches string 'Range of Values', indicating numeric type.

---
 Code/Sql/spTranslateTable.sql | 105 ++++++++++++++++++++++------------
 1 file changed, 68 insertions(+), 37 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 7c44bc8..9f7b943 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -8,6 +8,7 @@ CREATE PROC spTranslateTable
     @DestinationTableName varchar(MAX)
 AS
 
+    
     -- check that the variable codebook actually has data for this table
     DECLARE @variableTranslationCount INT
     SELECT @variableTranslationCount = COUNT(*) FROM Metadata.VariableCodebook C WHERE C.TableName = @SourceTableName
@@ -22,8 +23,8 @@ AS
                 DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + '
                 SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + '
                 CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName
-        
             EXEC(@CopyTableStatement)
+            
             -- we can't do any of the steps below, so just return
             RETURN
         END
@@ -43,50 +44,74 @@ AS
 
     -- get all column names of the source table from the information schema
     DROP TABLE IF EXISTS #tmpColNames
-    SELECT COLUMN_NAME
+    SELECT COLUMN_NAME, ORDINAL_POSITION
     INTO #tmpColNames
     FROM INFORMATION_SCHEMA.COLUMNS
-    WHERE TABLE_NAME = @SourceTableName AND TABLE_SCHEMA = @SourceTableSchema
+    WHERE 
+        TABLE_NAME = @SourceTableName 
+        AND TABLE_SCHEMA = @SourceTableSchema
+    
+    -- SELECT * FROM #tmpColNames
     
-    -- figure out which primary key column this table has
+    -- enumerate possible primary key column names
     DROP TABLE IF EXISTS #tmpPkColNames
     CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int)
-
     INSERT INTO #tmpPkColNames VALUES ('SEQN', 1)
     INSERT INTO #tmpPkColNames VALUES ('SAMPLEID', 2)
     INSERT INTO #tmpPkColNames VALUES ('DRXFDCD', 3)
     INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4)
-    INSERT INTO #tmpPkColNames VALUES ('POOLID', 4)
+    INSERT INTO #tmpPkColNames VALUES ('POOLID', 5)
+
+    -- Figure out which columns should remain numeric and not get translated
+    DROP TABLE IF EXISTS #tmpNumericColumns
+
+    SELECT C.COLUMN_NAME, ORDINAL_POSITION
+    INTO #tmpNumericColumns
+    FROM 
+        #tmpColNames C 
+        INNER JOIN Metadata.VariableCodebook V ON
+            V.TableName = @SourceTableName
+            AND C.COLUMN_NAME = V.Variable
+            AND V.ValueDescription = 'Range of Values'
+    WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
+    GROUP BY C.COLUMN_NAME, ORDINAL_POSITION
+
+    -- SELECT * FROM #tmpNumericColumns
+
+    -- remove the numeric column names from the table that contains all columns to be translated
+    DELETE FROM #tmpColNames WHERE COLUMN_NAME IN (SELECT COLUMN_NAME FROM #tmpNumericColumns)
+
+    -- SELECT * FROM #tmpColNames
+
+     -- create comma delimited list of the numeric columns, will be used later to retrieve them from the source table
+    DECLARE @NumericSelectColNames varchar(MAX)
+    SELECT 
+        @NumericSelectColNames = STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION)
+    FROM #tmpNumericColumns
     
-    DECLARE @pkColName varchar(256)
+    -- figure out which primary key column this table has    
+    DECLARE @pkColName varchar(MAX)
     SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority
 
-    PRINT @pkColName
+    -- PRINT @pkColName
 
     -- create comma delimited list of columns to be selected from the source table, including casting to varchar
     DECLARE @SourceSelectColNames varchar(MAX)
-    SELECT @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ')
+    SELECT 
+        @SourceSelectColNames = STRING_AGG(CAST('CAST([' + COLUMN_NAME + '] AS varchar(MAX)) AS [' + COLUMN_NAME + ']' AS varchar(MAX)), ', ')
     FROM #tmpColNames
+    WHERE COLUMN_NAME != @pkColName
+
+    SET @SourceSelectColNames = CAST('[' AS varchar(MAX)) + @pkColName + CAST('], ' AS varchar(MAX)) + @SourceSelectColNames
 
     -- PRINT @SourceSelectColNames
 
-    -- create comma delimited list of columns to be unpivoted, excluding primary key columns and old metadata
+    -- create comma delimited list of columns to be unpivoted, excluding primary key column
     DECLARE @UnpivotColNames varchar(MAX)
-    SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') 
+    SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION)
     FROM #tmpColNames
     WHERE 
-        -- primary key column names
-        -- TODO: copy them out of the #tmpPkColNames so we only need to update constants in one place
-        COLUMN_NAME != 'SEQN'
-        AND COLUMN_NAME != 'SAMPLEID'
-        AND COLUMN_NAME != 'DRXFDCD'
-        AND COLUMN_NAME != 'DRXMC'
-        AND COLUMN_NAME != 'POOLID'
-        -- metadata column names
-        -- TODO: remove these and test
-        AND COLUMN_NAME != 'DownloadUrl'
-        AND COLUMN_NAME != 'Questionnaire'
-        AND COLUMN_NAME != 'Description'
+        COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
 
     -- PRINT @UnpivotColNames
 
@@ -133,19 +158,24 @@ AS
     -- assemble SQL to pivot the translated table back into the original schema
     DECLARE @PivotStmt varchar(MAX)
     SET @PivotStmt = '
-        SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM (
-            SELECT 
-                ' + @pkColName + ', 
-                Variable, 
-                ValueDescription 
-            FROM ' + @TranslatedTempTableName + '
-        ) AS SourceTable
-        PIVOT (
-            MAX(ValueDescription)
-            FOR Variable IN (
-                    ' + @UnpivotColNames + '
-                ) 
-        ) AS PivotTable
+        WITH PivotTable AS(
+            SELECT * 
+            FROM (
+                SELECT 
+                    ' + @pkColName + ', 
+                    Variable, 
+                    ValueDescription 
+                FROM ' + @TranslatedTempTableName + '
+            ) AS SourceTable
+            PIVOT (
+                MAX(ValueDescription)
+                FOR Variable IN (
+                        ' + @UnpivotColNames + '
+                    ) 
+            ) AS PivotTable
+        )
+        SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' 
+        FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + ']
     '
 
     -- PRINT @PivotStmt
@@ -165,7 +195,8 @@ AS
     DECLARE @IndexStmt varchar(8000)
     SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName
 
-    --EXEC('SELECT * FROM ' + @DestinationTableName)
+    -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN')
+    -- EXEC('SELECT TOP 5 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN')
 
     DECLARE @GarbageCollectionStmt varchar(8000)
     SET @GarbageCollectionStmt = 'DROP TABLE IF EXISTS ' + @UnpivotTempTableName

From 40b23d64350bb7941327b330745f15800e79ce9a Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Wed, 28 Jun 2023 16:33:39 -0400
Subject: [PATCH 11/21] add debugging tool to run a single table translation

---
 Code/Sql/RunTableTranslateOnSingleTable.sql | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql
index 1612f49..f02e280 100644
--- a/Code/Sql/RunTableTranslateOnSingleTable.sql
+++ b/Code/Sql/RunTableTranslateOnSingleTable.sql
@@ -1,4 +1,4 @@
--- Manually run spTranslateTable
+-- Manually run spTranslateTable for debugging
 
 DECLARE @RC int
 DECLARE @SourceTableSchema varchar(8000)
@@ -9,7 +9,7 @@ DECLARE @DestinationTableName varchar(128)
 -- TODO: Set parameter values here.
 -- CAFE_G
 SET @SourceTableSchema  = 'Raw'
-SET @SourceTableName  = 'AUX_I'
+SET @SourceTableName  = 'VARLK_C'
 SET @DestinationTableSchema  = 'Translated'
 
 EXECUTE @RC = [dbo].[spTranslateTable] 
@@ -18,4 +18,7 @@ EXECUTE @RC = [dbo].[spTranslateTable]
   ,@DestinationTableSchema
   ,@DestinationTableName = @SourceTableName
 
-EXEC ('SELECT * FROM ' + @DestinationTableSchema + '.' + @SourceTableName)  
\ No newline at end of file
+EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName ) --+ ' ORDER BY SEQN')
+EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName ) -- + ' ORDER BY SEQN')
+
+-- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='AUX_J' AND (Variable='AUQ011' OR Variable='AUQ540')
\ No newline at end of file

From 21742ce3a3fd1db404d6b3dedca878df60bec336 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Wed, 28 Jun 2023 16:34:11 -0400
Subject: [PATCH 12/21] Handle cases when source tables contain all categorical
 variables or all numeric variables, or a combination of categorical and
 numeric variables.

---
 Code/Sql/spTranslateTable.sql | 125 ++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 28 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 9f7b943..189b40a 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -1,6 +1,9 @@
 /*
     Stored procedure to translate variable responses in an NHANES questionnaire table
 */
+DROP PROC IF EXISTS spTranslateTable
+GO
+
 CREATE PROC spTranslateTable 
     @SourceTableSchema varchar(MAX),
     @SourceTableName varchar(MAX),
@@ -51,8 +54,9 @@ AS
         TABLE_NAME = @SourceTableName 
         AND TABLE_SCHEMA = @SourceTableSchema
     
+    -- debugging
     -- SELECT * FROM #tmpColNames
-    
+
     -- enumerate possible primary key column names
     DROP TABLE IF EXISTS #tmpPkColNames
     CREATE TABLE #tmpPkColNames (ColumnName varchar(8000), Priority int)
@@ -62,6 +66,13 @@ AS
     INSERT INTO #tmpPkColNames VALUES ('DRXMC', 4)
     INSERT INTO #tmpPkColNames VALUES ('POOLID', 5)
 
+    -- figure out which primary key column this table has
+    DECLARE @pkColName varchar(MAX)
+    SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority
+
+    -- debugging
+    -- PRINT @pkColName
+
     -- Figure out which columns should remain numeric and not get translated
     DROP TABLE IF EXISTS #tmpNumericColumns
 
@@ -76,25 +87,42 @@ AS
     WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
     GROUP BY C.COLUMN_NAME, ORDINAL_POSITION
 
+    -- debugging
     -- SELECT * FROM #tmpNumericColumns
 
     -- remove the numeric column names from the table that contains all columns to be translated
     DELETE FROM #tmpColNames WHERE COLUMN_NAME IN (SELECT COLUMN_NAME FROM #tmpNumericColumns)
 
-    -- SELECT * FROM #tmpColNames
+    -- check whether there are any categorical columns left
+    DECLARE @categoricalVariableCount INT
+    SELECT @categoricalVariableCount = COUNT(*) FROM #tmpColNames WHERE COLUMN_NAME != @pkColName
+
+    -- debugging
+    -- PRINT 'number categorical cols left:'
+    -- PRINT @categoricalVariableCount
+    
+    -- if there are no translatable variables for this table, just copy it over
+    IF @categoricalVariableCount = 0
+        BEGIN
+            EXEC('
+                DROP TABLE IF EXISTS ' + @DestinationTableSchema + '.' + @DestinationTableName + '
+                SELECT * INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' FROM ' + @SourceTableSchema + '.' + @SourceTableName + '
+                CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName)
+            
+            -- we can't do any of the steps below, so just return
+            RETURN
+        END
 
      -- create comma delimited list of the numeric columns, will be used later to retrieve them from the source table
     DECLARE @NumericSelectColNames varchar(MAX)
     SELECT 
         @NumericSelectColNames = STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION)
     FROM #tmpNumericColumns
-    
-    -- figure out which primary key column this table has    
-    DECLARE @pkColName varchar(MAX)
-    SELECT TOP 1 @pkColName = P.ColumnName FROM #tmpColNames C INNER JOIN #tmpPkColNames P ON C.COLUMN_NAME = P.ColumnName ORDER BY P.Priority
-
-    -- PRINT @pkColName
 
+    -- debugging
+    -- PRINT 'Numeric cols:'
+    -- PRINT @NumericSelectColNames
+    
     -- create comma delimited list of columns to be selected from the source table, including casting to varchar
     DECLARE @SourceSelectColNames varchar(MAX)
     SELECT 
@@ -104,6 +132,8 @@ AS
 
     SET @SourceSelectColNames = CAST('[' AS varchar(MAX)) + @pkColName + CAST('], ' AS varchar(MAX)) + @SourceSelectColNames
 
+    -- debugging
+    -- PRINT 'source select col names:' 
     -- PRINT @SourceSelectColNames
 
     -- create comma delimited list of columns to be unpivoted, excluding primary key column
@@ -113,6 +143,8 @@ AS
     WHERE 
         COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
 
+    -- debugging
+    -- PRINT 'unpivot col names:'
     -- PRINT @UnpivotColNames
 
     -- assemble dynamic SQL to unpivot the original table
@@ -131,6 +163,7 @@ AS
             )
         ) AS unpvt'
 
+    -- debugging
     -- PRINT @unpivotStmt
     EXEC ('DROP TABLE IF EXISTS ' + @UnpivotTempTableName)
     EXEC(@unpivotStmt)
@@ -152,32 +185,67 @@ AS
                 AND CAST(T.Response AS VARCHAR) = CAST(V.CodeOrValue AS VARCHAR)
                 AND V.TableName = ''' + @SourceTableName + '''        
     '
+    
+    -- debugging
     -- PRINT @TranslateStmt
     EXEC (@TranslateStmt)
 
     -- assemble SQL to pivot the translated table back into the original schema
     DECLARE @PivotStmt varchar(MAX)
-    SET @PivotStmt = '
-        WITH PivotTable AS(
-            SELECT * 
-            FROM (
-                SELECT 
-                    ' + @pkColName + ', 
-                    Variable, 
-                    ValueDescription 
-                FROM ' + @TranslatedTempTableName + '
-            ) AS SourceTable
-            PIVOT (
-                MAX(ValueDescription)
-                FOR Variable IN (
-                        ' + @UnpivotColNames + '
-                    ) 
-            ) AS PivotTable
-        )
-        SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' 
-        FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + ']
-    '
 
+    -- if there are numeric columns that need to be merged with the translated categotical columns
+    IF @NumericSelectColNames IS NOT NULL
+        BEGIN
+            -- build the query to include a join against the source table
+            SET @PivotStmt = '
+                WITH PivotTable AS(
+                    SELECT * 
+                    FROM (
+                        SELECT 
+                            ' + @pkColName + ', 
+                            Variable, 
+                            ValueDescription 
+                        FROM ' + @TranslatedTempTableName + '
+                    ) AS SourceTable
+                    PIVOT (
+                        MAX(ValueDescription)
+                        FOR Variable IN (
+                                ' + @UnpivotColNames + '
+                            ) 
+                    ) AS PivotTable
+                )
+                SELECT P.*, ' + @NumericSelectColNames + ' INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' 
+                FROM PivotTable P INNER JOIN ' + @SourceTableSchema + '.' + @SourceTableName + ' S ON S.[' + @pkColName + '] = P.[' + @pkColName + ']
+            '
+        END
+    ELSE
+        BEGIN
+            -- since we have no columns to merge from the original source table, just pivot the 
+            -- translated variables and insert into the destination table
+            SET @PivotStmt = '
+                WITH PivotTable AS(
+                    SELECT * 
+                    FROM (
+                        SELECT 
+                            ' + @pkColName + ', 
+                            Variable, 
+                            ValueDescription 
+                        FROM ' + @TranslatedTempTableName + '
+                    ) AS SourceTable
+                    PIVOT (
+                        MAX(ValueDescription)
+                        FOR Variable IN (
+                                ' + @UnpivotColNames + '
+                            ) 
+                    ) AS PivotTable
+                )
+                SELECT P.* INTO ' + @DestinationTableSchema + '.' + @DestinationTableName + ' 
+                FROM PivotTable P
+            '
+        END
+
+    -- debugging
+    -- PRINT 'Pivot statement:'
     -- PRINT @PivotStmt
     EXEC(@PivotStmt) 
 
@@ -195,6 +263,7 @@ AS
     DECLARE @IndexStmt varchar(8000)
     SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName
 
+    -- debugging
     -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN')
     -- EXEC('SELECT TOP 5 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN')
 

From 30edb3202645a0e5f49cc987226fffe7704b5e71 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan_palmer@hms.harvard.edu>
Date: Wed, 28 Jun 2023 16:40:26 -0400
Subject: [PATCH 13/21] update environmnet variables to prepare for v0.0.4
 release

---
 Container/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Container/Dockerfile b/Container/Dockerfile
index 4be165e..8f02fef 100644
--- a/Container/Dockerfile
+++ b/Container/Dockerfile
@@ -354,8 +354,8 @@ RUN runuser -m -p  mssql -c '/opt/mssql/bin/sqlservr &' \
 WORKDIR /NHANES
 
 # Declare which version of the container this is, and make it available inside the container 
-ENV EPICONDUCTOR_CONTAINER_VERSION v0.0.3
-ENV EPICONDUCTOR_COLLECTION_DATE 2023-06-13
+ENV EPICONDUCTOR_CONTAINER_VERSION v0.0.4
+ENV EPICONDUCTOR_COLLECTION_DATE 2023-06-28
 
 RUN echo "EPICONDUCTOR_CONTAINER_VERSION=$EPICONDUCTOR_CONTAINER_VERSION" >> /usr/local/lib/R/etc/Renviron
 RUN echo "EPICONDUCTOR_COLLECTION_DATE=$EPICONDUCTOR_COLLECTION_DATE" >> /usr/local/lib/R/etc/Renviron

From 17014e6e1a3a78743f759d7b17fae09963d40207 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 17:10:56 -0400
Subject: [PATCH 14/21] add SHRINKFILE and CHECKPOINT to the end of the script
 that orchestrates table translation

---
 Code/R/translateRawTables.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Code/R/translateRawTables.R b/Code/R/translateRawTables.R
index 4403233..226ee1f 100644
--- a/Code/R/translateRawTables.R
+++ b/Code/R/translateRawTables.R
@@ -45,3 +45,8 @@ for (i in 1:nrow(tableList)) {
     SqlTools::dbSendUpdate(cn, stmt)
 }
 
+# shrink transaction log
+SqlTools::dbSendUpdate(cn, "DBCC SHRINKFILE(NhanesLandingZone_log)")
+
+# issue checkpoint
+SqlTools::dbSendUpdate(cn, "CHECKPOINT")

From d66b0c6840d215c372dac918c49822c5d3502ef0 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 17:12:27 -0400
Subject: [PATCH 15/21] fix clustered columnstore index names on both raw and
 translated tables

---
 Code/R/download.R             | 2 +-
 Code/Sql/spTranslateTable.sql | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/R/download.R b/Code/R/download.R
index bc7ce38..b27c4b1 100644
--- a/Code/R/download.R
+++ b/Code/R/download.R
@@ -343,7 +343,7 @@ for (i in i:length(dataTypes)) {
       SqlTools::dbSendUpdate(cn, insertStatement)
       
       indexStatement = paste(sep="",
-        "CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON Raw.", currDataType)
+        "CREATE CLUSTERED COLUMNSTORE INDEX ccix ON Raw.", currDataType)
                               
       SqlTools::dbSendUpdate(cn, indexStatement)
 
diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 189b40a..3ed6439 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -261,7 +261,7 @@ AS
 
     -- create a clustered index on the destination table with compression
     DECLARE @IndexStmt varchar(8000)
-    SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX idxSeqn ON ' + @DestinationTableSchema + '.' + @DestinationTableName
+    SET @IndexStmt = 'CREATE CLUSTERED COLUMNSTORE INDEX ccix ON ' + @DestinationTableSchema + '.' + @DestinationTableName
 
     -- debugging
     -- EXEC('SELECT TOP 5 * FROM ' + @DestinationTableSchema + '.' + @DestinationTableName + ' ORDER BY SEQN')

From 2b57ed3c71589f868f8a89c8c97b37a530d58c63 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 17:33:02 -0400
Subject: [PATCH 16/21] updated example debug script

---
 Code/Sql/RunTableTranslateOnSingleTable.sql | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Code/Sql/RunTableTranslateOnSingleTable.sql b/Code/Sql/RunTableTranslateOnSingleTable.sql
index f02e280..abc9a07 100644
--- a/Code/Sql/RunTableTranslateOnSingleTable.sql
+++ b/Code/Sql/RunTableTranslateOnSingleTable.sql
@@ -9,7 +9,7 @@ DECLARE @DestinationTableName varchar(128)
 -- TODO: Set parameter values here.
 -- CAFE_G
 SET @SourceTableSchema  = 'Raw'
-SET @SourceTableName  = 'VARLK_C'
+SET @SourceTableName  = 'POOLTF_G'
 SET @DestinationTableSchema  = 'Translated'
 
 EXECUTE @RC = [dbo].[spTranslateTable] 
@@ -18,7 +18,7 @@ EXECUTE @RC = [dbo].[spTranslateTable]
   ,@DestinationTableSchema
   ,@DestinationTableName = @SourceTableName
 
-EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName ) --+ ' ORDER BY SEQN')
-EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName ) -- + ' ORDER BY SEQN')
+EXEC ('SELECT TOP 10 * FROM ' + @DestinationTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN')
+EXEC ('SELECT TOP 10 * FROM ' + @SourceTableSchema + '.' + @SourceTableName + ' ORDER BY SEQN')
 
--- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='AUX_J' AND (Variable='AUQ011' OR Variable='AUQ540')
\ No newline at end of file
+-- SELECT * FROM Metadata.VariableCodebook V WHERE V.TableName='POOLTF_G'
\ No newline at end of file

From fe1465dd5928652565510fb8ac431d507952ad7f Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 17:34:10 -0400
Subject: [PATCH 17/21] fix bug when processing crosswalk tables that map SEQN
 to SAMPLEID

---
 Code/Sql/spTranslateTable.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 3ed6439..2818364 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -84,7 +84,7 @@ AS
             V.TableName = @SourceTableName
             AND C.COLUMN_NAME = V.Variable
             AND V.ValueDescription = 'Range of Values'
-    WHERE C.COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
+    WHERE C.COLUMN_NAME != @pkColName
     GROUP BY C.COLUMN_NAME, ORDINAL_POSITION
 
     -- debugging

From 19c285292d55110c837016e9415ecaca827a3c9f Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 20:22:07 -0400
Subject: [PATCH 18/21] removed DROP PROC... GO from
 Code/Sql/spTranslateTable.sql

---
 Code/Sql/spTranslateTable.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 2818364..2a5f6cf 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -1,8 +1,6 @@
 /*
     Stored procedure to translate variable responses in an NHANES questionnaire table
 */
-DROP PROC IF EXISTS spTranslateTable
-GO
 
 CREATE PROC spTranslateTable 
     @SourceTableSchema varchar(MAX),

From 6b4232222fa0463afe96bafd0b5ee130eff121f7 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 20:31:23 -0400
Subject: [PATCH 19/21] Exclude only the PK column from the list of columns to
 unpivot. This wasn't causeing any issues, since the FKs are numeric and
 wouldn't end up in the unpivot list, but it was technically not correct.

---
 Code/Sql/spTranslateTable.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/Sql/spTranslateTable.sql b/Code/Sql/spTranslateTable.sql
index 2a5f6cf..69a8e0e 100644
--- a/Code/Sql/spTranslateTable.sql
+++ b/Code/Sql/spTranslateTable.sql
@@ -139,7 +139,7 @@ AS
     SELECT @UnpivotColNames=STRING_AGG(CAST('[' + COLUMN_NAME + ']' AS varchar(MAX)), ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION)
     FROM #tmpColNames
     WHERE 
-        COLUMN_NAME NOT IN (SELECT ColumnName FROM #tmpPkColNames)
+        COLUMN_NAME != @pkColName
 
     -- debugging
     -- PRINT 'unpivot col names:'

From 5c12b62256c3159f384321c24fc0202973157e26 Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 21:05:02 -0400
Subject: [PATCH 20/21] Remove ReleaseNotes_v0_0_3.txt.  If we want release
 notes, they should all be in one file, cronologically.

---
 ReleaseNotes_v0_0_3.txt | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 ReleaseNotes_v0_0_3.txt

diff --git a/ReleaseNotes_v0_0_3.txt b/ReleaseNotes_v0_0_3.txt
deleted file mode 100644
index b53ca8f..0000000
--- a/ReleaseNotes_v0_0_3.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-#################################################
-#       Version 0.0.3 Release Notes             #
-#################################################
-
-#The following tables/columns are expected in the db build 
-
-[NhanesLandingZone].[Metadata].[DownloadErrors] : DataType, FileUrl, Error
-[NhanesLandingZone].[Metadata].[QuestionnaireDescriptions] : Description, DataGroup, TableName
-[NhanesLandingZone].[Metadata].[QuestionnaireVariables] : Variable, TableName, Description, Target, SasLabel
-[NhanesLandingZone].[Metadata].[VariableCodebook] : Variable, TableName, CodeOrValue, ValueDescription, Count, Cumulative, SkipToItem
-
-[NhanesLandingZone].[Ontology].[dbxrefs] : Subject, Object, Ontology
-[NhanesLandingZone].[Ontology].[edges] : Subject, Object, Ontology
-[NhanesLandingZone].[Ontology].[entailed_edges] : Subject, Object, Ontology
-[NhanesLandingZone].[Ontology].[labels] : Subject, Object, IRI, Ontology, Direct, Inherited
-[NhanesLandingZone].[Ontology].[nhanes_variables_mappings] : Variable, TableName, SourceTermID, SourceTerm, MappedTempLabel, MappedTermCURIE, MappedTermIRI, MappingScore, Tags, Ontology
-
-#Sample Queries, and expected outputs:
-
-Query: SELECT * FROM [NhanesLandingZone].[Metadata].[QuestionnaireVariables]
-Expected Output: Around 42,000 rows, NULLS expected for Variable='SEQN' in 'Target' column
-
-Query: 
-Expected Output: 
-
-Query: 
-Expected Output: 
-
-Query: 
-Expected Output: 
\ No newline at end of file

From de059f75036ba20f1807ad791215fe9682cfecbf Mon Sep 17 00:00:00 2001
From: Nathan Palmer <nathan.palmer@gmail.com>
Date: Wed, 28 Jun 2023 22:19:15 -0400
Subject: [PATCH 21/21] change reference to version number in preparation for
 0.0.4 release

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fb966db..2f110c5 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ docker \
         -e 'CONTAINER_USER_PASSWORD=PASSWORD' \
         -e 'ACCEPT_EULA=Y' \
         -e 'SA_PASSWORD=yourStrong(!)Password' \
-        hmsccb/nhanes-workbench:latest
+        hmsccb/nhanes-workbench:hmsccb/nhanes-workbench:version-0.0.4
 ```
 
 ### Parameters