-
Notifications
You must be signed in to change notification settings - Fork 0
/
create-VARIANZ-dataset.sas
1052 lines (852 loc) · 41.7 KB
/
create-VARIANZ-dataset.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
*Created by Sheree Gibb and Jinfeng Zhao in June 2016;
*This code takes the VARIANZ population file and adds demographic, geographic and health variables;
libname dlab "\\wprdfs08\Datalab-MA\MAA2016-11 VIEW-IDI";
libname dlab2 "\\wprdfs08\Datalab-MA\MAA2016-11 VIEW-IDI\UOA_DATA";
libname moh ODBC dsn=idi_clean_20160224_srvprd schema=moh_clean;
libname cen ODBC dsn=idi_clean_20160224_srvprd schema=cen_clean;
libname data ODBC dsn=idi_clean_20160224_srvprd schema=data;
libname sand ODBC dsn=idi_sandpit_srvprd schema="DL-MAA2016-11";
libname sandir ODBC dsn=idi_sandpit_srvprd schema=clean_read_IR_restrict;
libname metadata ODBC dsn=IDI_Metadata_srvprd schema=clean_read_CLASSIFICATIONS;
libname dia ODBC dsn=idi_clean_20160224_srvprd schema=dia_clean;
********************************************************
********************************************************
* DEMOGRAPHIC VARIABLES *
********************************************************
********************************************************
*Resident status for each year of followup;
%macro overseasdays(year);
proc sql;
create table overseas_spells_&year. as
select unique snz_uid, pos_applied_date, pos_ceased_date, pos_day_span_nbr
from data.person_overseas_spell
where pos_applied_date < "31DEC&year.:00:00:00.000"dt and pos_ceased_date >= "01JAN&year.:00:00:00.000"dt and snz_uid in (select unique snz_uid from dlab.varianz_pop)
order by snz_uid, pos_applied_date;
quit;
***calculate time spent overseas in each year for each spell;
data overseas_spells_&year.;
set overseas_spells_&year.;
if pos_ceased_date >= "31DEC&year.:00:00:00.000"dt and pos_applied_date < "01JAN&year.:00:00:00.000"dt
then time_overseas_&year. = 365;
else if pos_ceased_date >= "31DEC&year.:00:00:00.000"dt
then time_overseas_&year. = ("31DEC&year.:00:00:00.000"dt - pos_applied_date) / 86400;
else if pos_ceased_date < "31DEC&year.:00:00:00.000"dt and pos_applied_date >= "01JAN&year.:00:00:00.000"dt
then time_overseas_&year. = pos_day_span_nbr;
else if pos_ceased_date <= "31DEC&year.:00:00:00.000"dt and pos_applied_date < "01JAN&year.:00:00:00.000"dt
then time_overseas_&year. = (pos_ceased_date - "01JAN&year.:00:00:00.000"dt) / 86400;
run;
***sum days spent overseas per year for each person ***;
proc sql;
create table overseas_spells_&year._sum as select snz_uid, ROUND(SUM(time_overseas_&year.), 1) as days_overseas_&year.
from overseas_spells_&year.
group by snz_uid;
quit;
***replace missing with 0 days overseas***
*create flags for residence in each year;
data resident_&year.;
set overseas_spells_&year._sum;
if days_overseas_&year. =. then days_overseas_&year. = 0;
if days_overseas_&year.>183 then resident_&year.=0;
else resident_&year.=1;
keep snz_uid resident_&year.;
run;
%mend;
%overseasdays(2013);
%overseasdays(2014);
%overseasdays(2015);
*Merge files from all years;
proc sql;
create table resident_final as
select *
from resident_2013 as a full join resident_2014 as b on a.snz_uid=b.snz_uid
full join resident_2015 as c on b.snz_uid=c.snz_uid
order by resident_2013 descending, resident_2014 descending, resident_2015 descending;
quit;
*Transfer resident flags to VARIANZ population file;
proc sql;
create table varianz_resident as
select a.*, b.resident_2013, b.resident_2014, b.resident_2015
from dlab.varianz_pop as a left join resident_final as b on a.snz_uid=b.snz_uid;
quit;
*AGE and SEX are already on the population file;
**** ETHNICITY ****;
*Get ethnicity from census if available, otherwise from health, otherwise from personal detail table;
*Extract ethnicity info from census;
proc sql;
create table ethnicity_cen as
select unique a.*, input(b.cen_ind_maori_eth_ind_code, 2.0) as cen_maori, input(b.cen_ind_pac_islnd_eth_ind_code, 2.0) as cen_pacific,
input(b.cen_ind_asian_eth_ind_code, 2.0) as cen_asian, input(b.cen_ind_melaa_eth_ind_code, 2.0) as cen_melaa,
input(b.cen_ind_otr_eth_ind_code, 2.0) as cen_othereth, input(b.cen_ind_eur_eth_ind_code, 2.0) as cen_euro,
b.cen_ind_eth_rand6_grp1_code as cen_ethgrp1, b.cen_ind_eth_rand6_grp2_code as cen_ethgrp2,
b.cen_ind_eth_rand6_grp3_code as cen_ethgrp3, b.cen_ind_eth_rand6_grp4_code as cen_ethgrp4,
b.cen_ind_eth_rand6_grp5_code as cen_ethgrp5, b.cen_ind_eth_rand6_grp6_code as cen_ethgrp6
from varianz_resident as a left join cen.census_individual as b on a.snz_uid=b.snz_uid;
quit;
*Extract ethnicity from health;
proc sql;
create table ethnicity_moh as
select a.*, b.moh_pop_ethnicity_1_code as moh_eth1, b.moh_pop_ethnicity_2_code as moh_eth2, b.moh_pop_ethnicity_3_code as moh_eth3,
b.moh_pop_ethnic_grp1_snz_ind as moh_euro, b.moh_pop_ethnic_grp2_snz_ind as moh_maori, b.moh_pop_ethnic_grp3_snz_ind as moh_pacific,
b.moh_pop_ethnic_grp4_snz_ind as moh_asian, b.moh_pop_ethnic_grp5_snz_ind as moh_melaa, b.moh_pop_ethnic_grp6_snz_ind as moh_other
from ethnicity_cen as a left join moh.pop_cohort_demographics as b on a.snz_uid=b.snz_uid;
quit;
*Extract ethnicity from personal detail;
proc sql;
create table ethnicity_pd as
select a.*, b.snz_ethnicity_grp1_nbr as pd_euro, b.snz_ethnicity_grp2_nbr as pd_maori, b.snz_ethnicity_grp3_nbr as pd_pacific,
b.snz_ethnicity_grp4_nbr as pd_asian, b.snz_ethnicity_grp5_nbr as pd_melaa, b.snz_ethnicity_grp6_nbr as pd_other
from ethnicity_moh as a left join data.personal_detail as b on a.snz_uid=b.snz_uid;
quit;
**Recode ethnicity variables, create flags for Indian and Chinese, create prioritised ethnicity**;
data eth;
set ethnicity_pd;
*Recode census ethnicities;
if (cen_maori=2) then cen_maori = 1;
else if cen_maori>2 then cen_maori=.;
if (cen_pacific=2) then cen_pacific = 1;
else if cen_pacific>2 then cen_pacific=.;
if (cen_asian=2) then cen_asian = 1;
else if cen_asian>2 then cen_asian=.;
if (cen_melaa=2) then cen_melaa = 1;
else if cen_melaa>2 then cen_melaa=.;
if (cen_euro=2) then cen_euro = 1;
else if cen_euro>2 then cen_euro=.;
if (cen_othereth=2) then cen_othereth = 1;
else if cen_othereth>2 then cen_othereth=.;
*Create flags for Indian and Chinese ethncities from census;
if cen_ethgrp1="" then cen_chinese=.;
else if (cen_ethgrp1 >= "42000" and cen_ethgrp1 < "43000") then cen_chinese = 1;
else if (cen_ethgrp2 >= "42000" and cen_ethgrp2 < "43000") then cen_chinese = 1;
else if (cen_ethgrp3 >= "42000" and cen_ethgrp3 < "43000") then cen_chinese = 1;
else if (cen_ethgrp4 >= "42000" and cen_ethgrp4 < "43000") then cen_chinese = 1;
else if (cen_ethgrp5 >= "42000" and cen_ethgrp5 < "43000") then cen_chinese = 1;
else if (cen_ethgrp6 >= "42000" and cen_ethgrp6 < "43000") then cen_chinese = 1;
else cen_chinese = 0;
if cen_ethgrp1="" then cen_indian=.;
else if (cen_ethgrp1 >= "43000" and cen_ethgrp1 < "44000") then cen_indian = 1;
else if (cen_ethgrp2 >= "43000" and cen_ethgrp2 < "44000") then cen_indian = 1;
else if (cen_ethgrp3 >= "43000" and cen_ethgrp3 < "44000") then cen_indian = 1;
else if (cen_ethgrp4 >= "43000" and cen_ethgrp4 < "44000") then cen_indian = 1;
else if (cen_ethgrp5 >= "43000" and cen_ethgrp5 < "44000") then cen_indian = 1;
else if (cen_ethgrp6 >= "43000" and cen_ethgrp6 < "44000") then cen_indian = 1;
else cen_indian = 0;
*Create flags for Indian and Chinese ethnicity from MoH;
if (moh_eth1='' and moh_eth2='' and moh_eth3='') then moh_chinese=.;
else if (moh_eth1='42' or moh_eth2='42' or moh_eth3='42') then moh_chinese=1;
else moh_chinese=0;
if (moh_eth1='' and moh_eth2='' and moh_eth3='') then moh_indian=.;
else if (moh_eth1='43' or moh_eth2='43' or moh_eth3='43') then moh_indian=1;
else moh_indian=0;
*Create final ethnicities by using census if available, if not use MoH, if not use personal detail (Indian and Chinese not available from personal detail);
if cen_maori ne . then do;
maori_eth=cen_maori;
pacific_eth=cen_pacific;
asian_eth=cen_asian;
euro_eth=cen_euro;
melaa_eth=cen_melaa;
other_eth=cen_othereth;
indian_eth=cen_indian;
chinese_eth=cen_chinese;
end;
else if cen_maori=. and moh_maori ne . then do;
maori_eth=moh_maori;
pacific_eth=moh_pacific;
asian_eth=moh_asian;
euro_eth=moh_euro;
melaa_eth=moh_melaa;
other_eth=moh_other;
indian_eth=moh_indian;
chinese_eth=moh_chinese;
end;
else if cen_maori=. and moh_maori=. then do;
maori_eth=pd_maori;
pacific_eth=pd_pacific;
asian_eth=pd_asian;
euro_eth=pd_euro;
melaa_eth=pd_melaa;
other_eth=pd_other;
indian_eth=.;
chinese_eth=.;
end;
*Create prioritised ethnicity from final ethnicities;
format pri_eth $12.;
if maori_eth=. then pri_eth="";
else if maori_eth = 1 then pri_eth = "Maori";
else if pacific_eth = 1 then pri_eth = "Pacific";
else if (asian_eth = 1 and chinese_eth = 1) then pri_eth = "Chinese";
else if (asian_eth = 1 and indian_eth = 1) then pri_eth = "Indian";
else if asian_eth = 1 then pri_eth = "Other Asian";
else pri_eth = "Other";
**Assign pacific indian to "Indian" for pri_eth **;
if (maori_eth = 0 and pacific_eth= 1 and asian_eth= 1 and indian_eth= 1) then pri_eth = "Indian";
drop cen_ethgrp1 cen_ethgrp2 cen_ethgrp3 cen_ethgrp4 cen_ethgrp5 cen_ethgrp6 moh_eth1 moh_eth2 moh_eth3
cen_asian cen_euro cen_maori cen_melaa cen_othereth cen_pacific cen_indian cen_chinese
moh_asian moh_euro moh_maori moh_melaa moh_other moh_pacific moh_indian moh_chinese
pd_asian pd_euro pd_maori pd_melaa pd_other pd_pacific;
run;
********************************************************
********************************************************
* GEOGRAPHIC VARIABLES *
********************************************************
********************************************************;
*Get latest address from address notification table;
*Using SQL mgmt server (SAS is too slow) I have already created a subset of address notification table
with "MSDP" type addresses removed, and updates after 31 Dec 2012 removed;
*Subset is called "address_subset" in the sandpit;
*Link the address subset with the IDIERP;
proc sql;
create table addr2012 as
select distinct a.*, b.ant_address_source_code as addr_source, b.ant_meshblock_code as MB13, input(b.ant_notification_date,anydtdte10.) as addr_date_updated format ddmmyy10.
from eth as a left join sand.address_subset as b on a.snz_uid=b.snz_uid
order by snz_uid, addr_date_updated descending;
quit;
**select the most recently updated addresses**;
data final_address;
set addr2012;
by snz_uid;
if first.snz_uid;
*create a numeric version of mb13 for merging with concordance files;
mb13_num=input(mb13, 10.0);
run;
/**Check how many addresses come from each source, and how many are missing;*/
/*proc freq data=final_address;*/
/*tables addr_source;*/
/*run;*/
*Merge with concordances to get DHB and NZDEP;
*There are about 70000 people with meshblocks that don't match NZDEP or DHB- are they old meshblocks? Or newer ones?;
proc sql;
create table addr_nzdep as
select a.*, b.depindex2013 as nzdep2013, b.depscore2013
from final_address as a left join metadata.depindex2013 as b on a.mb13=b.meshblock2013;
quit;
proc sql;
create table addr_dhb as
select a.*, b.dhb_code, b.dhb_label, b.regc2013_code as region, b.regc2013_label as region_name, b.ta2013_code as TA13, b.ta2013_label as ta_name,
put(b.mb2006_code,z7.) as mb2006_code
from addr_nzdep as a left join dlab2.areas_file_2013 as b on a.mb13_num=b.mb2013_code;
quit;
************** *Check that there are not many-to-one matches here;
proc sql;
create table addr_nzdep06 as
select a.*, b.depindex2006 as nzdep2006, b.depscore2006
from addr_dhb as a left join metadata.depindex2006 as b on a.mb2006_code=b.meshblock2006
order by nzdep2013 descending;
quit;
********************************************************************
********************************************************************
* SMOKING, INCOME, OCCUPATION AND QUALS *
********************************************************************
********************************************************************;
*Smoking, income, occupation and quals from census;
proc sql;
create table varianz_cen as
select a.*, b.cen_ind_std_highest_qual_code as cen_highest_qual, b.cen_ind_occupation2006_code as cen_occ_anzsco2006, b.cen_ind_smoke_regular_code as cen_smoke_regular, b.cen_ind_smoke_ever_code as cen_smoke_ever,
b.cen_ind_ttl_inc_code
from addr_nzdep06 as a left join cen.census_individual as b on a.snz_uid=b.snz_uid;
quit;
*Income from Inland Revenue;
proc sql;
create table ir_income as
select a.snz_uid, b.inc_cal_yr_tot_yr_amt
from varianz_cen as a left join sandir.ems_cal_yr_summary as b
on a.snz_uid=b.snz_uid and b.ir_inc_year_nbr = 2012;
quit;
*Sum across different income types to get total annual income for each person;
proc summary nway data=ir_income;
class snz_uid;
var inc_cal_yr_tot_yr_amt;
output out=ir_income_sum (drop=_type_ _freq_) sum=;
run;
*Merge total annual incomes back into varianz file;
proc sql;
create table varianz_income as
select *
from varianz_cen as a left join ir_income_sum as b on a.snz_uid=b.snz_uid;
quit;
*Recode census bands and group IR income into census bands;
*Recode occupation residual categories to missing;
data varianz_income_final;
set varianz_income;
format cen_income $18.;
if cen_ind_ttl_inc_code = "11" then cen_income = "Loss";
else if cen_ind_ttl_inc_code = "12" then cen_income = "Zero income";
else if cen_ind_ttl_inc_code = "13" then cen_income = "$1-$5,000";
else if cen_ind_ttl_inc_code = "14" then cen_income = "$5,001-$10,000";
else if cen_ind_ttl_inc_code = "15" then cen_income = "$10,001-$15,000";
else if cen_ind_ttl_inc_code = "16" then cen_income = "$15,001-$20,000";
else if cen_ind_ttl_inc_code = "17" then cen_income = "$20,001-$25,000";
else if cen_ind_ttl_inc_code = "18" then cen_income = "$25,001-$30,000";
else if cen_ind_ttl_inc_code = "19" then cen_income = "$30,001-$35,000";
else if cen_ind_ttl_inc_code = "20" then cen_income = "$35,001-$40,000";
else if cen_ind_ttl_inc_code = "21" then cen_income = "$40,001-$50,000";
else if cen_ind_ttl_inc_code = "22" then cen_income = "$50,001-$60,000";
else if cen_ind_ttl_inc_code = "23" then cen_income = "$60,001-$70,000";
else if cen_ind_ttl_inc_code = "24" then cen_income = "$70,001-$100,000";
else if cen_ind_ttl_inc_code = "25" then cen_income = "$100,001-$150,000";
else if cen_ind_ttl_inc_code = "26" then cen_income = "$150,001 or More";
else if cen_ind_ttl_inc_code = "99" then cen_income = "Not stated";
format income $18.;
if cen_income ne '' then income=cen_income;
else if cen_income='' and inc_cal_yr_tot_yr_amt ne . then do;
if inc_cal_yr_tot_yr_amt<0 then income = "Loss";
else if inc_cal_yr_tot_yr_amt=0 then income = "Zero income";
else if (inc_cal_yr_tot_yr_amt >=1 and inc_cal_yr_tot_yr_amt <= 5000) then income = "$1-$5,000";
else if (inc_cal_yr_tot_yr_amt >=5001 and inc_cal_yr_tot_yr_amt <= 10000) then income = "$5,001-$10,000";
else if (inc_cal_yr_tot_yr_amt >=10001 and inc_cal_yr_tot_yr_amt <= 15000) then income = "$10,001-$15,000";
else if (inc_cal_yr_tot_yr_amt >=15001 and inc_cal_yr_tot_yr_amt <= 20000) then income = "$15,001-$20,000";
else if (inc_cal_yr_tot_yr_amt >=20001 and inc_cal_yr_tot_yr_amt <= 25000) then income = "$20,001-$25,000";
else if (inc_cal_yr_tot_yr_amt >=25001 and inc_cal_yr_tot_yr_amt <= 30000) then income = "$25,001-$30,000";
else if (inc_cal_yr_tot_yr_amt >=30001 and inc_cal_yr_tot_yr_amt <= 35000) then income = "$30,001-$35,000";
else if (inc_cal_yr_tot_yr_amt >=35001 and inc_cal_yr_tot_yr_amt <= 40000) then income = "$35,001-$40,000";
else if (inc_cal_yr_tot_yr_amt >=40001 and inc_cal_yr_tot_yr_amt <= 50000) then income = "$40,001-$50,000";
else if (inc_cal_yr_tot_yr_amt >=50001 and inc_cal_yr_tot_yr_amt <= 60000) then income = "$50,001-$60,000";
else if (inc_cal_yr_tot_yr_amt >=60001 and inc_cal_yr_tot_yr_amt <= 70000) then income = "$60,001-$70,000";
else if (inc_cal_yr_tot_yr_amt >=70001 and inc_cal_yr_tot_yr_amt <= 100000) then income = "$70,001-$100,000";
else if (inc_cal_yr_tot_yr_amt >=100001 and inc_cal_yr_tot_yr_amt <= 150000) then income = "$100,001-$150,000";
else if (inc_cal_yr_tot_yr_amt >=150001) then income = "$150,001 or More";
end;
else if cen_income='' and inc_cal_yr_tot_yr_amt=. then income='';
*Add a flag for income source;
if cen_income ne '' then income_source='CENSUS';
else if (cen_income='' and inc_cal_yr_tot_yr_amt ne .) then income_source='IRD';
else income_source='';
*Code missing 2006 meshblocks correctly;
if mb2006_code<'0000000' then mb2006_code='';
*Recode census occupation residual categories (99----) to missing;
if cen_occ_anzsco2006 in('997000' '999000' '999999') then cen_occ_anzsco2006='';
*Recode census highest quals residual categories (9-) to missing;
if cen_highest_qual in('94' '95' '96' '97' '98' '99') then cen_highest_qual='';
*Recode census smoking residual categories (7 and 9) to missing, and recode '2' to '0';
if cen_smoke_ever in('7' '9') then cen_smoke_ever='';
if cen_smoke_ever='2' then cen_smoke_ever='0';
if cen_smoke_regular in('7' '9') then cen_smoke_regular='';
if cen_smoke_regular='2' then cen_smoke_regular='0';
drop cen_ind_ttl_inc_code inc_cal_yr_tot_yr_amt cen_income mb13_num;
run;
********************************************************
********************************************************
* DATE OF LAST HEALTH CONTACT *
********************************************************
********************************************************;
*Get date of last health contact from NNPAC, NMDS, or PHO;
*Get the last consultation date from any of those sources prior to 31 Dec 2012;
*PHO;
proc sql;
create table lc_pho as
select a.snz_uid, b.moh_pho_last_consul_date as date, 'PHO' as flag length 6
from dlab.varianz_pop as a left join moh.pho_enrolment as b
on a.snz_uid=b.snz_uid and moh_pho_last_consul_date <= '2012-12-31'
order by snz_uid, moh_pho_last_consul_date descending;
quit;
*Delete everything except the most recent visit;
data lc_pho2;
set lc_pho;
if snz_uid ne lag(snz_uid) then first=1;
else first=0;
if first ne 1 then delete;
drop first;
run;
*NMDS;
proc sql;
create table lc_nmds as
select a.snz_uid, b.moh_evt_even_date as date, 'NMDS' as flag length 6
from dlab.varianz_pop as a left join moh.pub_fund_hosp_discharges_event as b
on a.snz_uid=b.snz_uid and b.moh_evt_even_date<='2012-12-31'
order by snz_uid, moh_evt_even_date descending;
quit;
*Delete everything except the most recent visit;
data lc_nmds2;
set lc_nmds;
if snz_uid ne lag(snz_uid) then first=1;
else first=0;
if first ne 1 then delete;
drop first;
run;
*NNPAC;
proc sql;
create table lc_nnpac as
select a.snz_uid, b.moh_nnp_service_date as date, 'NNPAC' as flag length 6
from dlab.varianz_pop as a left join moh.nnpac as b
on a.snz_uid=b.snz_uid and moh_nnp_service_date <= '2012-12-31'
order by snz_uid, moh_nnp_service_date descending;
quit;
*Delete everything except the most recent visit;
data lc_nnpac2;
set lc_nnpac;
if snz_uid ne lag(snz_uid) then first=1;
else first=0;
if first ne 1 then delete;
drop first;
run;
*Combine dates from PHO, NMDS and NNPAC;
*Can replace this with SQL so it doesn't need to be sorted?;
data all_dates;
set lc_pho2 lc_nmds2 lc_nnpac2;
run;
proc sort data=all_dates;
by snz_uid descending date;
run;
*Delete everything except the most recent visit;
data final_dates;
set all_dates;
if snz_uid ne lag(snz_uid) then first=1;
else first=0;
if first ne 1 then delete;
drop first;
if date='' then flag='';
run;
*Merge latest dates back into VARIANZ dataset;
proc sql;
create table dlab.varianz_date_lc as
select a.*, input(b.date,anydtdte10.) as last_contact_date format ddmmyy10., b.flag as last_contact_flag
from varianz_income_final as a left join final_dates as b on a.snz_uid=b.snz_uid;
quit;
********************************************************
********************************************************
* PHARMACEUTICAL DISPENSING *
********************************************************
********************************************************;
*Extract all prescriptions from 1 July 2012 (6 mths prior to reference date, for baseline dispensing)
to end of 2014 (most recent dates available) for people in VARIANZ population;
proc sql;
create table varianz_pharms as
select a.*, b.moh_pha_dispensed_date, input(b.moh_pha_dim_form_pack_code, 8.) as moh_pha_dim_form_pack_code
from dlab.varianz_pop as a left join moh.pharmaceutical as b
on a.snz_uid=b.snz_uid and b.moh_pha_dispensed_date>='2012-07-01';
quit;
*Apply pharm group codes to all prescriptions;
proc sql;
create table varianz_pharm_flags as
select a.*, b.Aspirin_low_dose, b.Aspirin_any_dose, b.Clopidogrel, b.Dipyridamole, b.Prasugrel, b.Ticagrelor, b.Ticlopidine, b.Warfarin, b.Other_anticoagulant, b.ACEI,
b.ARB, b.Thiazide, b.Beta_blocker, b.Calcium_channel_blocker, b.Alphablocker, b.Spironolactone, b.Other_AHT, b.Statin, b.Other_lipid_lowering, b.Loop_diuretic,
b.Metolazone, b.Nitrate, b.Other_antianginal, b.Nonaspirin_NSAIDS, b.Corticosteroids, b.Proton_pump_inhibitors, b.H2_blockers, b.H_pylori_eradication,
b.Diabetes, b.SSRI
from varianz_pharms as a left join dlab2.dim_form_pack_subsidy_10Jun16 as b
on a.moh_pha_dim_form_pack_code=b.dim_form_pack_subsidy_key ;
quit;
*Create flags for dispensing at least once in each six-month period from 1 July 2012 to 31 Dec 2014;
%macro pharms6mth(startdate, enddate, period);
*Summarise flags per person for 6-month period;
proc summary nway data=varianz_pharm_flags;
class snz_uid;
var Aspirin_low_dose Aspirin_any_dose Clopidogrel Dipyridamole Prasugrel Ticagrelor Ticlopidine Warfarin Other_anticoagulant ACEI
ARB Thiazide Beta_blocker Calcium_channel_blocker Alphablocker Spironolactone Other_AHT Statin Other_lipid_lowering Loop_diuretic
Metolazone Nitrate Other_antianginal Nonaspirin_NSAIDS Corticosteroids Proton_pump_inhibitors H2_blockers H_pylori_eradication
Diabetes SSRI;
where moh_pha_dispensed_date>="&startdate." and moh_pha_dispensed_date<="&enddate.";
output out=pharm_flag_&period (drop=_type_ _freq_) max=;
run;
data pharm_flag_&period._recode;
set pharm_flag_&period.;
*Rename all vars with time period identifier;
rename aspirin_low_dose=&period._aspirin_low_dose;
rename aspirin_any_dose=&period._aspirin_any_dose;
rename clopidogrel=&period._clopidogrel;
rename dipyridamole=&period._dipyridamole;
rename prasugrel=&period._prasugrel;
rename ticagrelor=&period._ticagrelor;
rename ticlopidine=&period._ticlopidine ;
rename warfarin=&period._warfarin;
rename other_anticoagulant=&period._other_anticoag;
rename ACEI=&period._ACEI;
rename ARB=&period._ARB;
rename thiazide=&period._thiazide;
rename beta_blocker=&period._beta_blocker;
rename calcium_channel_blocker=&period._calcium_channel;
rename alphablocker=&period._alphablocker;
rename spironolactone=&period._spironolactone;
rename other_AHT=&period._other_AHT;
rename statin=&period._statin;
rename other_lipid_lowering=&period._other_lipid_lower;
rename loop_diuretic=&period._loop_diuretic;
rename metolazone=&period._metolazone;
rename nitrate=&period._nitrate;
rename other_antianginal=&period._other_antianginal;
rename nonaspirin_NSAIDS=&period._nonaspirin_NSAIDS;
rename corticosteroids=&period._corticosteroids;
rename proton_pump_inhibitors=&period._proton_pump;
rename H2_blockers=&period._H2_blockers;
rename H_pylori_eradication=&period._H_pylori_erad;
rename Diabetes=&period._diabetes;
rename SSRI=&period._SSRI;
run;
%mend pharms6mth;
%pharms6mth(2012-07-01,2012-12-31,base);
%pharms6mth(2013-01-01,2013-06-30,post_2013a);
%pharms6mth(2013-07-01,2013-12-31,post_2013b);
%pharms6mth(2014-01-01,2014-06-30,post_2014a);
%pharms6mth(2014-07-01,2014-12-31,post_2014b);
*Put all 6 month periods together;
data all_post_pharms;
merge pharm_flag_base_recode pharm_flag_post_2013a_recode pharm_flag_post_2013b_recode pharm_flag_post_2014a_recode pharm_flag_post_2014b_recode;
by snz_uid;
*Recode all missing pharms to '0';
%macro recode2(period);
*Recode missing to zero for all pharms flags;
if &period._aspirin_low_dose=. then &period._aspirin_low_dose=0;
if &period._aspirin_any_dose=. then &period._aspirin_any_dose=0;
if &period._clopidogrel=. then &period._clopidogrel=0;
if &period._dipyridamole=. then &period._dipyridamole=0;
if &period._prasugrel=. then &period._prasugrel=0;
if &period._ticagrelor=. then &period._ticagrelor=0;
if &period._ticlopidine=. then &period._ticlopidine =0;
if &period._warfarin=. then &period._warfarin=0;
if &period._other_anticoag=. then &period._other_anticoag=0;
if &period._ACEI=. then &period._ACEI=0;
if &period._ARB=. then &period._ARB=0;
if &period._thiazide=. then &period._thiazide=0;
if &period._beta_blocker=. then &period._beta_blocker=0;
if &period._calcium_channel=. then &period._calcium_channel=0;
if &period._alphablocker=. then &period._alphablocker=0;
if &period._spironolactone=. then &period._spironolactone=0;
if &period._other_AHT=. then &period._other_AHT=0;
if &period._statin=. then &period._statin=0;
if &period._other_lipid_lower=. then &period._other_lipid_lower=0;
if &period._loop_diuretic=. then &period._loop_diuretic=0;
if &period._metolazone=. then &period._metolazone=0;
if &period._nitrate=. then &period._nitrate=0;
if &period._other_antianginal=. then &period._other_antianginal=0;
if &period._nonaspirin_NSAIDS=. then &period._nonaspirin_NSAIDS=0;
if &period._corticosteroids=. then &period._corticosteroids=0;
if &period._proton_pump=. then &period._proton_pump=0;
if &period._H2_blockers=. then &period._H2_blockers=0;
if &period._H_pylori_erad=. then &period._H_pylori_erad=0;
if &period._Diabetes=. then &period._diabetes=0;
if &period._SSRI=. then &period._SSRI=0;
%mend recode2;
%recode2(base);
%recode2(post_2013a);
%recode2(post_2013b);
%recode2(post_2014a);
%recode2(post_2014b);
run;
*Merge flags back into main VARIANZ dataset;
proc sql;
create table dlab.varianz_pharms as
select *
from dlab.varianz_date_lc as a left join all_post_pharms as b on a.snz_uid=b.snz_uid;
quit;
*************************************************
*************************************************
***** CVD conditions *****
*************************************************
*************************************************;
*Extract dispensings of antianginals in the 5 years prior to reference date;
proc sql;
create table angina_pharms as
select a.*, input(b.moh_pha_dispensed_date,anydtdte10.) as moh_pha_dispensed_date format ddmmyy10., b.moh_pha_dim_form_pack_code, 1 as angina_pharm_flag
from dlab.varianz_pop as a left join moh.pharmaceutical as b
on a.snz_uid=b.snz_uid and b.moh_pha_dispensed_date>='2008-01-01' and moh_pha_dim_form_pack_code in('55914' '55915' '55916' '55917' '55918' '55919'
'55920' '55922' '55923' '55924' '55925' '55927' '55928' '58523' '58524' '59906' '59907' '60864' '60865' '60866' '60867' '60868' '62201' '62202' '62203' '62204' '62399' '62400'
'62401' '63723' '63724' '63725' '64022' '64023' '64930' '64931' '64932' '65054' '65055' '65056' '65400' '65401' '65402' '65403' '65404' '65405' '66281' '66282' '66283' '66284'
'66285' '67219' '67220' '67221' '67222' '67223' '67224' '67225' '67546' '67547' '67548' '67549' '67550' '67551' '67860' '67861' '67862' '67863' '69364' '69365' '69366' '69367'
'69718' '69719' '70005' '70006' '70007' '70008' '70009' '70010' '70216' '70217' '70218' '70219' '70220' '70221' '70222' '70223' '70224' '70225' '70226' '70227' '70228' '70229'
'70230' '70335' '70336' '70337' '70740' '70741' '70742' '70743' '70744' '70745' '70746' '72738' '72739' '72758' '72759' '72760' '73367' '73368' '73604' '73813' '75331' '75332'
'75431' '77846' '78080' '78153' '78391' '78725' '78776' '78828' '80596' '80616' '80977' '80978' '81699' '55879' '55921' '55929' '55930' '61774' '61775' '65587' '65588' '65589'
'74111' '79338' '79339' '79408' '79409');
quit;
*Sum to get total number of dispensings per person in the 5 year period prior to reference date, and date of first dispensing;
proc summary nway data=angina_pharms (where=(moh_pha_dim_form_pack_code ne '' and moh_pha_dispensed_date<='31DEC2012'd));
class snz_uid;
var angina_pharm_flag moh_pha_dispensed_date;
output out=angina_pharms_count (drop=_type_ to_drop) min=to_drop first_date;
run;
*Flag people with 3 or more dispensings. Date of 'onset' is the date of the first dispensing in the 5 year period;
data angina_flag;
set angina_pharms_count;
if _freq_ ge 3 then hist_angina_pharms=1;
else hist_angina_pharms=0;
format date_first_angina_pharms ddmmyy10.;
if _freq_ ge 3 then date_first_angina_pharms=first_date;
if _freq_<3 then delete;
keep snz_uid hist_angina_pharms date_first_angina_pharms;
run;
*Select all CVD diagnoses from diagnosis table;
proc sql;
create table cvd_diags as
select distinct moh_dia_event_id_nbr, moh_dia_clinical_code
from moh.pub_fund_hosp_discharges_diag as b
where moh_dia_clinical_code in(select distinct clinical_code from dlab2.latest_cvd_subgrp_longfm);
quit;
*Get event information for those diagnoses;
proc sql;
create table cvd_events as
select distinct a.*, b.snz_uid, input(b.moh_evt_evst_date,anydtdte10.) as moh_evt_evst_date format ddmmyy10., b.moh_evt_even_date
from cvd_diags as a left join moh.pub_fund_hosp_discharges_event as b
on a.moh_dia_event_id_nbr=b.moh_evt_event_id_nbr;
quit;
*Restrict to CVD events for varianz cohort;
proc sql;
create table cvd_events_varianz as
select distinct a.snz_uid, b.moh_evt_evst_date, b.moh_evt_even_date, b.moh_dia_event_id_nbr, b.moh_dia_clinical_code
from sand.varianz_pop as a left join cvd_events as b
on a.snz_uid=b.snz_uid;
quit;
*Get subgroup flags for those events;
proc sql;
create table cvd_subgroups_varianz as
select *
from cvd_events_varianz as a left join dlab2.latest_cvd_subgrp_longfm as b
on a.moh_dia_clinical_code=b.clinical_code;
quit;
*Create flags and dates of first hospitalisation prior to reference date for people with CVD events;
*Macro for creating flag for each CVD subgroup;
%macro subgrp(subgroup,shortname);
*Sort by subgroup and date, oldest first;
proc sort data=cvd_subgroups_varianz;
by snz_uid &subgroup. moh_evt_evst_date;
run;
data &shortname._hist;
set cvd_subgroups_varianz (where=(&subgroup.=1 and moh_evt_evst_date<='31DEC2012'd));
last_uid=lag(snz_uid);
format date_first_&shortname. ddmmyy10.;
if snz_uid ne last_uid then date_first_&shortname.=moh_evt_evst_date;
hist_&shortname.=1;
if snz_uid=last_uid then delete;
keep snz_uid hist_&shortname. date_first_&shortname.;
run;
%mend subgrp;
%subgrp(subgroup_acs,ACS);
%subgrp(subgroup_CHD,CHD);
%subgrp(subgroup_MI,MI);
%subgrp(subgroup_PVD,PVD);
%subgrp(subgroup_TIA,TIA);
%subgrp(subgroup_ischaemic_stroke,isch_str);
%subgrp(angina,angina);
%subgrp(heart_failure,HF);
%subgrp(diabetes,diabetes);
%subgrp(atrial_fibrillation,AF);
%subgrp(artherosclerotic_cvd,arth_CVD);
%subgrp(history_haemorrhagic_stroke,haem_str);
run;
*Flag for Diabetes from VDR;
proc sql;
create table diab_all as
select snz_uid, input(moh_chr_fir_incidnt_date,anydtdte10.) as moh_chr_fir_incidnt_date format ddmmyy10.
from moh.chronic_condition
where moh_chr_condition_text='DIA' and moh_chr_fir_incidnt_date<='2012-12-31'
order by snz_uid, moh_chr_fir_incidnt_date;
quit;
data diabetes;
set diab_all;
last_uid=lag(snz_uid);
format date_first_diabetes_vdr ddmmyy10.;
if snz_uid ne last_uid then date_first_diabetes_vdr=moh_chr_fir_incidnt_date;
hist_diabetes_vdr=1;
if snz_uid=last_uid then delete;
keep snz_uid hist_diabetes_vdr date_first_diabetes_vdr;
run;
*Add CVD flags and dates back into main dataset;
data dlab.varianz_cvd_hist;
merge dlab.varianz_pharms (in=a) arth_cvd_hist angina_hist haem_str_hist diabetes_hist diabetes angina_flag
acs_hist chd_hist mi_hist pvd_hist tia_hist isch_str_hist hf_hist af_hist;
by snz_uid;
if a;
*Create 'any cvd' variable;
*As per Suneela's email, CVD is defined as at least one incident of:
artherosclerotic CVD at any prior hospitalisation
or
angina at any prior hospitalisation
or
dispensing of 3x angina pharms in the last 5 years
or
haemorrhagic stroke at any prior hospitalisation;
hist_any_cvd=max(hist_arth_cvd,hist_angina,hist_angina_pharms,hist_haem_str);
format date_first_any_cvd ddmmyy10.;
date_first_any_cvd=min(date_first_angina,date_first_angina_pharms,date_first_arth_cvd,date_first_haem_str);
*Recode blanks to 0;
if hist_any_cvd=. then hist_any_cvd=0;
if hist_arth_cvd=. then hist_arth_cvd=0;
if hist_angina=. then hist_angina=0;
if hist_angina_pharms=. then hist_angina_pharms=0;
if hist_diabetes=. then hist_diabetes=0;
if hist_diabetes_vdr=. then hist_diabetes_vdr=0;
if hist_haem_str=. then hist_haem_str=0;
if hist_acs=. then hist_acs=0;
if hist_chd=. then hist_chd=0;
if hist_mi=. then hist_mi=0;
if hist_pvd=. then hist_pvd=0;
if hist_tia=. then hist_tia=0;
if hist_isch_str=. then hist_isch_str=0;
if hist_hf=. then hist_hf=0;
if hist_af=. then hist_af=0;
*Create var for people with antianginal pharms history ONLY;
if hist_angina_pharms=1 and max(hist_arth_cvd, hist_angina, hist_diabetes, hist_diabetes_vdr, hist_haem_str, hist_acs, hist_chd, hist_mi, hist_pvd,
hist_tia, hist_isch_str, hist_hf, hist_af)=0 then hist_angina_pharms_only=1;
else hist_angina_pharms_only=0;
run;
*Create flags and dates of first hospitalisation after reference date for people with CVD events;
*Extract dispensings of antianginals after the reference date;
proc sql;
create table angina_pharms_post as
select a.*, input(b.moh_pha_dispensed_date,anydtdte10.) as moh_pha_dispensed_date format ddmmyy10., b.moh_pha_dim_form_pack_code, 1 as angina_pharm_flag
from dlab.varianz_pop as a left join moh.pharmaceutical as b
on a.snz_uid=b.snz_uid and b.moh_pha_dispensed_date>='2013-01-01' and moh_pha_dim_form_pack_code in('55914' '55915' '55916' '55917' '55918' '55919'
'55920' '55922' '55923' '55924' '55925' '55927' '55928' '58523' '58524' '59906' '59907' '60864' '60865' '60866' '60867' '60868' '62201' '62202' '62203' '62204' '62399' '62400'
'62401' '63723' '63724' '63725' '64022' '64023' '64930' '64931' '64932' '65054' '65055' '65056' '65400' '65401' '65402' '65403' '65404' '65405' '66281' '66282' '66283' '66284'
'66285' '67219' '67220' '67221' '67222' '67223' '67224' '67225' '67546' '67547' '67548' '67549' '67550' '67551' '67860' '67861' '67862' '67863' '69364' '69365' '69366' '69367'
'69718' '69719' '70005' '70006' '70007' '70008' '70009' '70010' '70216' '70217' '70218' '70219' '70220' '70221' '70222' '70223' '70224' '70225' '70226' '70227' '70228' '70229'
'70230' '70335' '70336' '70337' '70740' '70741' '70742' '70743' '70744' '70745' '70746' '72738' '72739' '72758' '72759' '72760' '73367' '73368' '73604' '73813' '75331' '75332'
'75431' '77846' '78080' '78153' '78391' '78725' '78776' '78828' '80596' '80616' '80977' '80978' '81699' '55879' '55921' '55929' '55930' '61774' '61775' '65587' '65588' '65589'
'74111' '79338' '79339' '79408' '79409');
quit;
*Sum to get total number of dispensings per person following the reference date, and date of first dispensing;
proc summary nway data=angina_pharms_post (where=(moh_pha_dim_form_pack_code ne ''));
class snz_uid;
var angina_pharm_flag moh_pha_dispensed_date;
output out=angina_pharms_count_post (drop=_type_ to_drop) min=to_drop first_date;
run;
*Flag people with 3 or more dispensings. Date of 'onset' is the date of the first dispensing in the 5 year period;
data angina_flag_post;
set angina_pharms_count_post;
if _freq_ ge 3 then post_angina_pharms=1;
else post_angina_pharms=0;
format date_first_post_angina_pharms ddmmyy10.;
if _freq_ ge 3 then date_first_post_angina_pharms=first_date;
if _freq_<3 then delete;
keep snz_uid post_angina_pharms date_first_post_angina_pharms;
run;
*Create flags and dates of first hospitalisation after the reference date for people with CVD events;
*Macro for creating flag for each CVD subgroup;
%macro subgrp(subgroup,shortname);
*Sort by subgroup and date, oldest first;
proc sort data=cvd_subgroups_varianz;
by snz_uid &subgroup. moh_evt_evst_date;
run;
data &shortname._post;
set cvd_subgroups_varianz (where=(&subgroup.=1 and moh_evt_evst_date>='01JAN2013'd));
last_uid=lag(snz_uid);
format date_first_post_&shortname. ddmmyy10.;
if snz_uid ne last_uid then date_first_post_&shortname.=moh_evt_evst_date;
post_&shortname.=1;
if snz_uid=last_uid then delete;
keep snz_uid post_&shortname. date_first_post_&shortname.;
run;
%mend subgrp;
%subgrp(subgroup_acs,ACS);
%subgrp(subgroup_CHD,CHD);
%subgrp(subgroup_MI,MI);
%subgrp(subgroup_PVD,PVD);
%subgrp(subgroup_TIA,TIA);
%subgrp(subgroup_ischaemic_stroke,isch_str);
%subgrp(angina,angina);
%subgrp(heart_failure,HF);
%subgrp(diabetes,diabetes);
%subgrp(atrial_fibrillation,AF);
%subgrp(artherosclerotic_cvd,arth_CVD);
%subgrp(outcomes_haemorrhagic_stroke,haem_str);
run;
*Flag for Diabetes from VDR;
proc sql;
create table diab_all_post as
select snz_uid, input(moh_chr_fir_incidnt_date,anydtdte10.) as moh_chr_fir_incidnt_date format ddmmyy10.
from moh.chronic_condition
where moh_chr_condition_text='DIA' and moh_chr_fir_incidnt_date>='2013-01-01'
order by snz_uid, moh_chr_fir_incidnt_date;
quit;
data diabetes_vdr_post;
set diab_all_post;
last_uid=lag(snz_uid);
format date_first_post_diabetes_vdr ddmmyy10.;
if snz_uid ne last_uid then date_first_post_diabetes_vdr=moh_chr_fir_incidnt_date;
post_diabetes_vdr=1;
if snz_uid=last_uid then delete;
keep snz_uid post_diabetes_vdr date_first_post_diabetes_vdr;
run;
*Add CVD flags and dates back into main dataset;
data varianz_cvd_all;
merge dlab.varianz_cvd_hist (in=a) arth_cvd_post angina_post haem_str_post diabetes_post diabetes_vdr_post angina_flag_post
acs_post chd_post mi_post pvd_post tia_post isch_str_post hf_post af_post;
by snz_uid;
if a;
*Create 'any cvd' variable;
*As per Suneela's email, CVD is defined as at least one incident of:
artherosclerotic CVD at any prior hospitalisation
or
angina at any prior hospitalisation
or
dispensing of 3x angina pharms in the last 5 years
or
haemorrhagic stroke at any prior hospitalisation;
post_any_cvd=max(post_arth_cvd, post_angina, post_angina_pharms, post_haem_str);
format date_first_post_any_cvd ddmmyy10.;
date_first_post_any_cvd=min(date_first_post_angina, date_first_post_angina_pharms, date_first_post_arth_cvd, date_first_post_haem_str);
*Recode blanks to 0;
if post_any_cvd=. then post_any_cvd=0;
if post_arth_cvd=. then post_arth_cvd=0;
if post_angina=. then post_angina=0;
if post_angina_pharms=. then post_angina_pharms=0;
if post_diabetes=. then post_diabetes=0;
if post_diabetes_vdr=. then post_diabetes_vdr=0;
if post_haem_str=. then post_haem_str=0;
if post_acs=. then post_acs=0;
if post_chd=. then post_chd=0;
if post_mi=. then post_mi=0;
if post_pvd=. then post_pvd=0;
if post_tia=. then post_tia=0;
if post_isch_str=. then post_isch_str=0;
if post_hf=. then post_hf=0;
if post_af=. then post_af=0;
*Recode missing resident flags to 1 (if there were no overseas spells they will be missing);
if resident_2013=. then resident_2013=1;
if resident_2014=. then resident_2014=1;
if resident_2015=. then resident_2015=1;
**Calculate date of loss to followup;
* At present this is only loss to outmigration as we don't have updated death data. When we do will need to change to include date of death;
format date_lost_followup ddmmyy10.;
if resident_2013=0 then date_lost_followup='31DEC2013'd;
else if resident_2014=0 then date_lost_followup='31DEC2014'd;
else if resident_2015=0 then date_lost_followup='31DEC2015'd;
else date_lost_followup=.;
run;
*****************************************************
********* Dates of death- temporary measure *********
*****************************************************;
*Get dates of death from DIA data- no cause of death at this stage, and less reliable than MoH data, but it will do until we have more recent MoH data;
proc sql;
create table deaths as
select a.snz_uid, b.dia_dth_death_year_nbr, b.dia_dth_death_month_nbr
from dlab.varianz_pop as a left join dia.deaths as b on a.snz_uid=b.snz_uid
order by dia_dth_death_year_nbr descending;
quit;
*Reformat dates, impute day of death as last day of month (I didn't worry about leap years for now);
data death_dates;