Skip to content

Commit

Permalink
Merge pull request #245 from UoM-Data-Science-Platforms/Update-LH003-…
Browse files Browse the repository at this point in the history
…scripts-Mai

Update lh003 scripts
  • Loading branch information
maiparkes58 authored Nov 11, 2024
2 parents 58c5386 + 7c0b7aa commit 5cdc62e
Show file tree
Hide file tree
Showing 15 changed files with 323 additions and 155 deletions.
17 changes: 16 additions & 1 deletion projects/SDE Lighthouse 03 - Kontopantelis/README.html
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,22 @@ <h2>Methodology</h2>
The RDE has access to a library of resusable SQL queries for common tasks, and sets of clinical codes for different phenotypes, built up from previous studies.
Prior to data extraction, the code is checked and signed off by another RDE.</p>
<h2>Reusable queries</h2>
<p>This project did not require any reusable queries from the local library <a href="https://github.com/rw251/gm-idcr/tree/master/shared/Reusable%20queries%20for%20data%20extraction">https://github.com/rw251/gm-idcr/tree/master/shared/Reusable queries for data extraction</a>.## Clinical code sets</p>
<p>This project required the following reusable queries:</p>
<ul>
<li>Create table of patients who were alive at the study start date</li>
</ul>
<p>Further details for each query can be found below.</p>
<h3>Create table of patients who were alive at the study start date</h3>
<p>undefined</p>
<p><em>Input</em></p>
<pre><code>undefined
</code></pre>
<p><em>Output</em></p>
<pre><code>undefined
</code></pre>
<p><em>File</em>: <code>query-get-possible-patients.sql</code></p>
<p><em>Link</em>: <a href="https://github.com/rw251/gm-idcr/tree/master/shared/Reusable%20queries%20for%20data%20extraction/query-get-possible-patients.sql">https://github.com/rw251/.../query-get-possible-patients.sql</a></p>
<h2>Clinical code sets</h2>
<p>This project required the following clinical code sets:</p>
<ul>
<li>delirium v1</li>
Expand Down
23 changes: 22 additions & 1 deletion projects/SDE Lighthouse 03 - Kontopantelis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,28 @@ Prior to data extraction, the code is checked and signed off by another RDE.

## Reusable queries

This project did not require any reusable queries from the local library [https://github.com/rw251/gm-idcr/tree/master/shared/Reusable queries for data extraction](https://github.com/rw251/gm-idcr/tree/master/shared/Reusable%20queries%20for%20data%20extraction).## Clinical code sets
This project required the following reusable queries:

- Create table of patients who were alive at the study start date

Further details for each query can be found below.

### Create table of patients who were alive at the study start date
undefined

_Input_
```
undefined
```

_Output_
```
undefined
```
_File_: `query-get-possible-patients.sql`

_Link_: [https://github.com/rw251/.../query-get-possible-patients.sql](https://github.com/rw251/gm-idcr/tree/master/shared/Reusable%20queries%20for%20data%20extraction/query-get-possible-patients.sql)
## Clinical code sets

This project required the following clinical code sets:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,112 @@ USE SCHEMA SDE_REPOSITORY.SHARED_UTILITIES;
-- - Sex
-- - YearOfBirth
-- - Ethnicity
-- - YearAndMonthOfDeath
-- - EthnicityCategory
-- - EIMD2019Decile1IsMostDeprived10IsLeastDeprived
-- - FirstDementiaDate
-- - DeathYearAndMonth

-- NB1 PI did not request date of dementia diagnosis, but it seems likely
-- that they will need it, so including as well.

-- NB2 Date of death was requested in a separate file, but including it here
-- for brevity, and because it has a 1-2-1 relationship with patient.

set(StudyStartDate) = to_date('2006-01-01');
set(StudyEndDate) = to_date('2024-06-30');


--┌─────────────────────────────────────────────────────────────────┐
--│ Create table of patients who were alive at the study start date │
--└─────────────────────────────────────────────────────────────────┘

-- ** any patients opted out of sharing GP data would not appear in the final table

-- this script requires an input of StudyStartDate

-- takes one parameter:
-- minimum-age : integer - The minimum age of the group of patients. Typically this would be 0 (all patients) or 18 (all adults)

--ALL DEATHS

DROP TABLE IF EXISTS Death;
CREATE TEMPORARY TABLE Death AS
SELECT
DEATH."GmPseudo",
TO_DATE(DEATH."RegisteredDateOfDeath") AS DeathDate,
OM."DiagnosisOriginalMentionCode",
OM."DiagnosisOriginalMentionDesc",
OM."DiagnosisOriginalMentionChapterCode",
OM."DiagnosisOriginalMentionChapterDesc",
OM."DiagnosisOriginalMentionCategory1Code",
OM."DiagnosisOriginalMentionCategory1Desc"
FROM PRESENTATION.NATIONAL_FLOWS_PCMD."DS1804_Pcmd" DEATH
LEFT JOIN PRESENTATION.NATIONAL_FLOWS_PCMD."DS1804_PcmdDiagnosisOriginalMentions" OM
ON OM."XSeqNo" = DEATH."XSeqNo" AND OM."DiagnosisOriginalMentionNumber" = 1;

-- GET LATEST SNAPSHOT OF DEMOGRAPHICS TABLE

DROP TABLE IF EXISTS LatestSnapshot;
CREATE TEMPORARY TABLE LatestSnapshot AS
SELECT
p.*
FROM PRESENTATION.GP_RECORD."DemographicsProtectedCharacteristics_SecondaryUses" p
INNER JOIN (
SELECT "GmPseudo", MAX("Snapshot") AS LatestSnapshot
FROM PRESENTATION.GP_RECORD."DemographicsProtectedCharacteristics_SecondaryUses" p
WHERE DATEDIFF(YEAR, TO_DATE("DateOfBirth"), $StudyStartDate) >= 18 -- adults only
GROUP BY "GmPseudo"
) t2
ON t2."GmPseudo" = p."GmPseudo" AND t2.LatestSnapshot = p."Snapshot";

-- CREATE A PATIENT SUMMARY TABLE TO WORK OUT WHICH PATIENTS HAVE LEFT GM
-- AND THEREFORE THEIR DATA FEED STOPPED

drop table if exists PatientSummary;
create temporary table PatientSummary as
select dem."GmPseudo",
min("Snapshot") as "min",
max("Snapshot") as "max",
max(DeathDate) as DeathDate
from PRESENTATION.GP_RECORD."DemographicsProtectedCharacteristics_SecondaryUses" dem
LEFT JOIN Death ON Death."GmPseudo" = dem."GmPseudo"
group by dem."GmPseudo";

-- FIND THE DATE THAT PATIENT LEFT GM

drop table if exists leftGMDate;
create temporary table leftGMDate as
select *,
case when DeathDate is null and "max" < (select max("max") from PatientSummary) then "max" else null end as "leftGMDate"
from PatientSummary;

-- FIND ALL ADULT PATIENTS ALIVE AT STUDY START DATE

DROP TABLE IF EXISTS AlivePatientsAtStart;
CREATE TEMPORARY TABLE AlivePatientsAtStart AS
SELECT
dem.*,
Death."DEATHDATE" AS "DeathDate",
l."leftGMDate"
FROM LatestSnapshot dem
LEFT JOIN Death ON Death."GmPseudo" = dem."GmPseudo"
LEFT JOIN leftGMDate l ON l."GmPseudo" = dem."GmPseudo"
WHERE
(Death."DEATHDATE" IS NULL OR Death."DEATHDATE" > $StudyStartDate) -- alive on study start date
AND
(l."leftGMDate" IS NULL OR l."leftGMDate" > $StudyEndDate); -- if patient left GM (therefore we stop receiving their data), ensure it is after study end date


DROP TABLE IF EXISTS SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis";
CREATE TABLE SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis" (
"GmPseudo" NUMBER(38,0),
"FK_Patient_ID" NUMBER(38,0),
"FirstDementiaDate" DATE
) AS
SELECT "GmPseudo", "FK_Patient_ID", MIN("Dementia_DiagnosisDate") AS FirstDementiaDate
FROM PRESENTATION.GP_RECORD."LongTermConditionRegister_SecondaryUses"
FROM INTERMEDIATE.GP_RECORD."LongTermConditionRegister_SecondaryUses"
WHERE "Dementia_DiagnosisDate" IS NOT NULL
AND "Age" >= 18
AND "FK_Patient_ID" IN (SELECT "FK_Patient_ID" FROM AlivePatientsAtStart)
GROUP BY "GmPseudo", "FK_Patient_ID";


Expand All @@ -47,13 +135,11 @@ SELECT
"EthnicityLatest_Category" AS "EthnicityCategory",
"IMD_Decile" AS "IMD2019Decile1IsMostDeprived10IsLeastDeprived",
"FirstDementiaDate",
CAST("RegisteredDateOfDeath" AS DATE) AS "RegisteredDateOfDeath"
DATE_TRUNC(month, alive."DeathDate") AS "DeathYearAndMonth"
FROM SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis" cohort
LEFT OUTER JOIN PRESENTATION.GP_RECORD."DemographicsProtectedCharacteristics_SecondaryUses" demo
ON demo."GmPseudo" = cohort."GmPseudo"
LEFT OUTER JOIN PRESENTATION.NATIONAL_FLOWS_PCMD."DS1804_Pcmd" mortality
ON mortality."GmPseudo" = cohort."GmPseudo"
QUALIFY row_number() OVER (PARTITION BY demo."GmPseudo" ORDER BY "Snapshot" DESC) = 1;
LEFT OUTER JOIN AlivePatientsAtStart alive
ON alive."GmPseudo" = cohort."GmPseudo"
QUALIFY row_number() OVER (PARTITION BY alive."GmPseudo" ORDER BY "Snapshot" DESC) = 1;

-- Then we check to see if there are any new GmPseudo ids. We do this by making a temp table
-- of all "new" GmPseudo ids. I.e. any GmPseudo ids that we've already got a unique id for
Expand Down Expand Up @@ -84,5 +170,6 @@ FROM "AllPseudos_SDE_Lighthouse_03_Kontopantelis";
-- created in the 0.code-sets.sql file
DROP TABLE IF EXISTS SDE_REPOSITORY.SHARED_UTILITIES."LH003-1_Patients";
CREATE TABLE SDE_REPOSITORY.SHARED_UTILITIES."LH003-1_Patients" AS
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID", * EXCLUDE "GmPseudo"
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID",
* EXCLUDE "GmPseudo"
FROM SDE_REPOSITORY.SHARED_UTILITIES."LH003-1_Patients_WITH_PSEUDO_IDS";
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ USE SCHEMA SDE_REPOSITORY.SHARED_UTILITIES;
-- From application:
-- Table 2: Lifestyle factors (from 2006 to present)
-- - PatientID
-- - TestName ( smoking status, BMI, alcohol consumption)
-- - TestDate
-- - TestResult
-- - TestUnit



-- ... processing [[create-output-table::"LH003-2a_Lifestyl_BMI"]] ...
Expand All @@ -25,7 +24,7 @@ SELECT
"GmPseudo",
"EventDate" AS "TestDate",
"BMI" AS "TestResult"
FROM INTERMEDIATE.GP_RECORD."Readings_BMI"
FROM INTERMEDIATE.GP_RECORD."Readings_BMI_SecondaryUses"
WHERE "GmPseudo" IN (SELECT "GmPseudo" FROM SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis")
AND YEAR("EventDate") >= 2006;

Expand Down Expand Up @@ -58,5 +57,6 @@ FROM "AllPseudos_SDE_Lighthouse_03_Kontopantelis";
-- created in the 0.code-sets.sql file
DROP TABLE IF EXISTS SDE_REPOSITORY.SHARED_UTILITIES."LH003-2a_Lifestyl_BMI";
CREATE TABLE SDE_REPOSITORY.SHARED_UTILITIES."LH003-2a_Lifestyl_BMI" AS
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID", * EXCLUDE "GmPseudo"
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID",
* EXCLUDE "GmPseudo"
FROM SDE_REPOSITORY.SHARED_UTILITIES."LH003-2a_Lifestyl_BMI_WITH_PSEUDO_IDS";
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ USE SCHEMA SDE_REPOSITORY.SHARED_UTILITIES;
-- From application:
-- Table 2: Lifestyle factors (from 2006 to present)
-- - PatientID
-- - TestName ( smoking status, BMI, alcohol consumption)
-- - TestName ( Alcohol, Smoking)
-- - TestDate
-- - Description
-- - TestResult
-- - TestUnit
-- - TestUnits
-- - Status
-- - Consumption

-- NB1 - I'm only restricting BMI values to 2006 to present.
-- NB2 - The PI confirmed that instead of raw values of when statuses were
Expand All @@ -35,7 +38,7 @@ SELECT
"Units" AS "TestUnits",
"AlcoholStatus" AS "Status",
"AlcoholConsumption" AS "Consumption"
FROM INTERMEDIATE.GP_RECORD."Readings_Alcohol"
FROM INTERMEDIATE.GP_RECORD."Readings_Alcohol_SecondaryUses"
WHERE "GmPseudo" IN (SELECT "GmPseudo" FROM SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis")
UNION
SELECT
Expand All @@ -50,7 +53,7 @@ SELECT
WHEN "SmokingConsumption_Date" = "SmokingStatus_Date" THEN "SmokingConsumption"
ELSE NULL
END -- "Consumption"
FROM INTERMEDIATE.GP_RECORD."Readings_Smoking"
FROM INTERMEDIATE.GP_RECORD."Readings_Smoking_SecondaryUses"
WHERE "GmPseudo" IN (SELECT "GmPseudo" FROM SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis");

-- Then we check to see if there are any new GmPseudo ids. We do this by making a temp table
Expand Down Expand Up @@ -82,5 +85,6 @@ FROM "AllPseudos_SDE_Lighthouse_03_Kontopantelis";
-- created in the 0.code-sets.sql file
DROP TABLE IF EXISTS SDE_REPOSITORY.SHARED_UTILITIES."LH003-2b_Lifestyle_Alcohol_Smoking";
CREATE TABLE SDE_REPOSITORY.SHARED_UTILITIES."LH003-2b_Lifestyle_Alcohol_Smoking" AS
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID", * EXCLUDE "GmPseudo"
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID",
* EXCLUDE "GmPseudo"
FROM SDE_REPOSITORY.SHARED_UTILITIES."LH003-2b_Lifestyle_Alcohol_Smoking_WITH_PSEUDO_IDS";
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ USE SCHEMA SDE_REPOSITORY.SHARED_UTILITIES;
-- From application:
-- Table 3: Comorbidities (using full date range available)
-- - PatientID
-- - Condition
-- - FirstDate
-- - LatestDate
-- - ConditionOccurences (number of times appeared)
-- - All available comorbidity dates' columns

-- NB1 - just using all the existing comorbidity data in the GP_Record schema.
-- NB2 - this is not the format initially requested, but likely what the team
Expand Down Expand Up @@ -40,10 +37,8 @@ SELECT
"NonDiabeticHyperglycemia_DiagnosisDate", "Obesity_DiagnosisDate", "Osteoporosis_DiagnosisDate", "PainfulCondition_DiagnosisDate",
"PalliativeCare_DiagnosisDate", "ParkinsonsDisease_DiagnosisDate", "PepticUlcerDisease_DiagnosisDate",
"PeripheralArterialDisease_DiagnosisDate", "ProstateDisorder_DiagnosisDate", "Psoriasis_DiagnosisDate",
"RheumatoidArthritis_DiagnosisDate", "Stroke_DiagnosisDate", "ThyroidDisorder_DiagnosisDate", "TIA_DiagnosisDate",
"FirstLTC", "FirstLTC_DiagnosisDate", "SecondLTC", "SecondLTC_DiagnosisDate", "ThirdLTC",
"ThirdLTC_DiagnosisDate", "FourthLTC", "FourthLTC_DiagnosisDate", "FifthLTC", "FifthLTC_DiagnosisDate"
FROM INTERMEDIATE.GP_RECORD."LongTermConditionRegister_Diagnosis"
"RheumatoidArthritis_DiagnosisDate", "Stroke_DiagnosisDate", "ThyroidDisorder_DiagnosisDate", "TIA_DiagnosisDate"
FROM INTERMEDIATE.GP_RECORD."LongTermConditionRegister_SecondaryUses"
WHERE "GmPseudo" IN (SELECT "GmPseudo" FROM SDE_REPOSITORY.SHARED_UTILITIES."Cohort_SDE_Lighthouse_03_Kontopantelis")
QUALIFY row_number() OVER (PARTITION BY "GmPseudo" ORDER BY "Snapshot" DESC) = 1;

Expand Down Expand Up @@ -76,5 +71,6 @@ FROM "AllPseudos_SDE_Lighthouse_03_Kontopantelis";
-- created in the 0.code-sets.sql file
DROP TABLE IF EXISTS SDE_REPOSITORY.SHARED_UTILITIES."LH003-3_Comorbidities";
CREATE TABLE SDE_REPOSITORY.SHARED_UTILITIES."LH003-3_Comorbidities" AS
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID", * EXCLUDE "GmPseudo"
SELECT SDE_REPOSITORY.SHARED_UTILITIES.gm_pseudo_hash_SDE_Lighthouse_03_Kontopantelis("GmPseudo") AS "PatientID",
* EXCLUDE "GmPseudo"
FROM SDE_REPOSITORY.SHARED_UTILITIES."LH003-3_Comorbidities_WITH_PSEUDO_IDS"; -- this brings back the values from the most recent snapshot
Loading

0 comments on commit 5cdc62e

Please sign in to comment.