layout | title | nav_order | nav_exclude | has_toc |
---|---|---|---|---|
default |
HbF GWAS |
2 |
false |
false |
Genome-wide association study of fetal hemoglobin (HbF) in Cameroon, Tanzania, and African American Sickle Cell Anemia patients
library(tidyverse)
cm <- read_table("../clinicaldata/cm_clinical_data.tsv")
tz <- read_table("../clinicaldata/tz_clinical_data.tsv")
cm %>% names()
## [1] "FID" "IID" "AGE"
## [4] "SEX" "HbF" "HbS"
## [7] "Hb_Phenotype" "Hb" "Phenotypes"
## [10] "Genotypes" "Haplotypes" "athal"
## [13] "Curated_Age_yrs" "VOC.year" "Consultation.year"
## [16] "Hosp" "Stroke" "HbA2_percentage"
## [19] "RBC" "MCV.fl." "MCHC"
## [22] "WBC" "Lymp" "Mono"
## [25] "Platelets" "Curated_penotype" "Sample_name_comment"
Quantitative/Continuous variables
variables <- c(
"AGE", # Age in years
"HbS", # Proportion of HbS
"HbF", # Proportion of fetal haemoglobin
"HbA2_percentage", # Proportion of HbA2
"Consultation.year", # Number of consultations per year
"MCHC", # Mean corpuscular haemoglobin concentration
"Mono", # Monocyte count (x10^9/l)
"Hosp", # Number of hospitalizations per year
"RBC", # Red blood cell count (x10^12/l)
"WBC", # White blood cell count (x10^9/l)
"Platelets", # Platelet count (x10^9/l)
"VOC.year", # Number of vaso occlusive crises per year
"MCV.fl.", # Mean cell volume
"Lymp" # Lymphocyte count (x10^9/l)
)
cm %>% select(variables) %>% summary()
## AGE HbS HbF HbA2_percentage
## Min. : 5.00 Min. :19.60 Min. : 0.700 Min. : 0.000
## 1st Qu.:10.00 1st Qu.:80.30 1st Qu.: 4.700 1st Qu.: 2.700
## Median :16.00 Median :86.00 Median : 8.500 Median : 3.200
## Mean :18.05 Mean :83.56 Mean : 9.892 Mean : 3.324
## 3rd Qu.:24.00 3rd Qu.:90.70 3rd Qu.:13.600 3rd Qu.: 3.800
## Max. :66.00 Max. :96.50 Max. :37.400 Max. :18.200
## NA's :3
## Consultation.year MCHC Mono Hosp
## Min. : 0.000 Min. : 3.80 Min. :0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.:25.90 1st Qu.:0.700 1st Qu.: 1.000
## Median : 1.000 Median :31.00 Median :1.000 Median : 1.000
## Mean : 2.049 Mean :29.21 Mean :1.161 Mean : 1.974
## 3rd Qu.: 2.000 3rd Qu.:33.00 3rd Qu.:1.400 3rd Qu.: 2.000
## Max. :15.000 Max. :54.20 Max. :9.400 Max. :24.000
## NA's :805 NA's :17 NA's :14 NA's :59
## RBC WBC Platelets VOC.year
## Min. :0.84 Min. : 0.400 Min. : 22.0 Min. : 0.000
## 1st Qu.:2.29 1st Qu.: 8.607 1st Qu.: 310.0 1st Qu.: 1.000
## Median :2.70 Median :11.200 Median : 401.0 Median : 2.000
## Mean :2.75 Mean :11.819 Mean : 415.4 Mean : 3.189
## 3rd Qu.:3.12 3rd Qu.:14.200 3rd Qu.: 498.0 3rd Qu.: 4.000
## Max. :7.70 Max. :49.800 Max. :1185.0 Max. :48.000
## NA's :15 NA's :14 NA's :13 NA's :29
## MCV.fl. Lymp
## Min. : 59.0 Min. : 0.100
## 1st Qu.: 81.0 1st Qu.: 3.570
## Median : 87.0 Median : 4.700
## Mean : 87.1 Mean : 5.186
## 3rd Qu.: 94.0 3rd Qu.: 6.210
## Max. :132.0 Max. :22.600
## NA's :14 NA's :13
Categorigal variables
cm %>%
select(SEX) %>%
table() %>%
as.data.frame() %>%
na.omit() %>%
mutate(Prop = (Freq/sum(Freq))*100) -> sex # Sex
sex
## . Freq Prop
## 1 F 657 52.56
## 2 M 593 47.44
colnames(sex) <- c("cat", "freq", "prop")
par(mar=c(4,4,3,4))
barplot(
sex$prop,
ylim=c(0, 60),
col=c(2,4)
)
text(
x=c(0.7,1.9),
y=30,
labels=c("Female", "Male")
)
mtext(
text = "Proportion (%)",
side = 2,
line = 3
)
mtext(
text = "Sex",
side = 1,
line = 1
)
Figure 1. Proportion of females and males in the Cameroonian discovery dataset
cm %>% select(Stroke) %>% table() # Number of patients with at least one overt stroke
## .
## No Yes
## 1195 46
cm %>% select(athal) %>% table() # 3.7 kb HbA1/HbA2 alpha thalassaemia deletion
## .
## α3.7/α3.7 αα/α3.7 αα/αα
## 18 101 286
cm %>% select(Haplotypes) %>% table() # HBB gene cluster haplotypes
## .
## Atypical BEN BEN/AI BEN/Atypical BEN/CAM BEN/CAR
## 3 331 2 26 99 2
## CAM CAM/AI CAM/Atypical CAM/SEN CAR CAR/Atypical
## 174 1 8 1 1 1
## CAR/CAM SEN
## 1 4
CAM = CAM/CAM, BEN = BEN/BEN, SEN = SEN/SEN, CAR = CAR/CAR, Atypical = Atypical/Atypical
tz %>% names()
## [1] "FID" "IID" "AGE" "HbF" "SEX"
Quantitative/Continuous variables
variables <- c(
"AGE",
"HbF"
)
tz %>% select(variables) %>% summary()
## AGE HbF
## Min. : 5.021 Min. : 0.200
## 1st Qu.: 7.876 1st Qu.: 2.600
## Median :11.069 Median : 4.800
## Mean :13.288 Mean : 5.748
## 3rd Qu.:16.298 3rd Qu.: 7.800
## Max. :44.310 Max. :32.500
Categorical variable
tz %>%
mutate(sex.updated = ifelse(SEX == 1, "M", ifelse(SEX == 2, "F", NA))) %>%
select(sex.updated) %>%
table() %>%
as.data.frame() %>%
na.omit() %>%
mutate(Prop = (Freq/sum(Freq))*100) -> sex # Sex
sex
## . Freq Prop
## 1 F 539 52.895
## 2 M 480 47.105
colnames(sex) <- c("cat", "freq", "prop")
par(mar=c(4,4,3,4))
barplot(
sex$prop,
ylim=c(0, 60),
col=c(2,4)
)
text(
x=c(0.7,1.9),
y=30,
labels=c("Female", "Male")
)
mtext(
text = "Proportion (%)",
side = 2,
line = 3
)
mtext(
text = "Sex",
side = 1,
line = 1
)
Figure 2. Proportion of females and males in the Tanzanian replication dataset
cm %>%
mutate(normalizedHbF = HbF^(1/3)) -> cm.hbf.norm
write.table(
cm.hbf.norm,
"../clinicaldata/cm_clinical_data_updated.tsv",
col.names = T,
row.names = F,
quote = F,
sep = "\t"
)
cm.hbf.norm %>%
select(HbF, normalizedHbF) %>%
head()
## # A tibble: 6 × 2
## HbF normalizedHbF
## <dbl> <dbl>
## 1 31.6 3.16
## 2 6.4 1.86
## 3 12.6 2.33
## 4 8.8 2.06
## 5 17.5 2.60
## 6 17.8 2.61
# Tanzania
tz %>%
mutate(normalizedHbF = HbF^(1/3)) -> tz.hbf.norm
write.table(
tz.hbf.norm,
"../clinicaldata/tz_clinical_data_updated.tsv",
col.names = T,
row.names = F,
quote = F,
sep = "\t"
)
tz.hbf.norm %>%
select(HbF, normalizedHbF) %>%
head()
## # A tibble: 6 × 2
## HbF normalizedHbF
## <dbl> <dbl>
## 1 0.2 0.585
## 2 0.3 0.669
## 3 0.3 0.669
## 4 0.3 0.669
## 5 0.3 0.669
## 6 0.4 0.737
cm.hbf.den <- density(cm.hbf.norm$HbF)
cm.nhbf.den <- density(cm.hbf.norm$normalizedHbF)
tz.hbf.den <- density(tz.hbf.norm$HbF)
tz.nhbf.den <- density(tz.hbf.norm$normalizedHbF)
#par(mfrow=c(2,2))
par(mar=c(4,4,3,4))
plot(cm.hbf.den, main="Cameroon HbF")
polygon(cm.hbf.den, col=2)
plot(cm.nhbf.den, ylab="", main="Cameroon normalized HbF")
polygon(cm.nhbf.den, col=5)
plot(tz.hbf.den, main="Tanzania HbF")
polygon(tz.hbf.den, col=2)
plot(tz.nhbf.den, ylab="", main="Tanzania normalized HbF")
polygon(tz.nhbf.den, col=5)
Figure 3. HbF distribution in Cameroonian and Tanzanian study participants before and after normalization