Skip to content

Latest commit

 

History

History
606 lines (472 loc) · 16.1 KB

Quantile_Regression_NonLinear_Regression.md

File metadata and controls

606 lines (472 loc) · 16.1 KB

Quantile Regression and Non Linear Regression In R

Habib Ezatabadi (Stats9)

import data

xx <- read.csv(file = file.choose(), header = T)[, -1]
dim(xx)
## [1] 52  3
names(xx)
## [1] "x"  "y1" "y2"
attach(xx)


fig1: y1 ~ x

plot(y1 ~ x, pch = 16, col = "red", cex = 2)



LSE Model

Model1 <- lm(y1 ~ x)
sm1 <- summary(Model1)
sm1
## 
## Call:
## lm(formula = y1 ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2963 -1.2160 -0.7705 -0.1687 19.5215 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.5248     1.2567   1.213    0.231    
## x             2.2725     0.5166   4.399 5.69e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.981 on 50 degrees of freedom
## Multiple R-squared:  0.2791, Adjusted R-squared:  0.2646 
## F-statistic: 19.35 on 1 and 50 DF,  p-value: 5.694e-05


Model LTS

library(robustbase)
Model2 <- ltsReg(y1 ~ x, alpha = 0.9)
sm2 <- summary(Model2)
sm2
## 
## Call:
## ltsReg.formula(formula = y1 ~ x, alpha = 0.9)
## 
## Residuals (from reweighted LS):
##       Min        1Q    Median        3Q       Max 
## -0.818464 -0.444420 -0.006101  0.407019  1.104439 
## 
## Coefficients:
##           Estimate Std. Error t value Pr(>|t|)    
## Intercept  1.61984    0.17340   9.341 2.24e-12 ***
## x          1.87300    0.07181  26.082  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5451 on 48 degrees of freedom
## Multiple R-Squared: 0.9341,  Adjusted R-squared: 0.9327 
## F-statistic: 680.3 on 1 and 48 DF,  p-value: < 2.2e-16


LAD (q = 0.1) Model

library(quantreg)
## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve
Model3 <- rq(y1 ~ x, tau = 0.1)
sm3 <- summary(Model3)
sm3
## 
## Call: rq(formula = y1 ~ x, tau = 0.1)
## 
## tau: [1] 0.1
## 
## Coefficients:
##             coefficients lower bd upper bd
## (Intercept) 1.05209      0.94220  1.23804 
## x           1.82462      1.74576  1.93940


LAD (q = 0.5) Model

Model4 <- rq(y1 ~ x, tau = 0.5)
sm4 <- summary(Model4)
## Warning in rq.fit.br(x, y, tau = tau, ci = TRUE, ...): Solution may be
## nonunique
sm4
## 
## Call: rq(formula = y1 ~ x, tau = 0.5)
## 
## tau: [1] 0.5
## 
## Coefficients:
##             coefficients lower bd upper bd
## (Intercept) 1.61060      1.25415  2.02027 
## x           1.87391      1.68524  2.04754


LAD (q = 0.9) Model

Model5 <- rq(y1 ~ x, tau = 0.9)
sm5 <- summary(Model5)
sm5
## 
## Call: rq(formula = y1 ~ x, tau = 0.9)
## 
## tau: [1] 0.9
## 
## Coefficients:
##             coefficients lower bd upper bd
## (Intercept) 2.10375      1.85103  3.94087 
## x           2.06507      1.70515  2.34398

LMS Model

require(MASS)
## Loading required package: MASS
Model6 <- lmsreg(y1 ~ x)
coef_Model6 <- Model6$coefficients
coef_Model6
## (Intercept)           x 
##    1.353550    1.818419


gain Results

ymean <- mean(y1)
R2Model1 <- round(sm1$r.squared, 4)
SST <- sum((y1 - ymean) ^ 2) 
yhat_LSE <- Model1$fitted.values
yhat_LTS <- Model2$fitted.values
yhat_LMS <- Model6$fitted.values
yhat_LAD_q1 <- Model3$fitted.values
yhat_LAD_q5 <- Model4$fitted.values
yhat_LAD_q9 <- Model5$fitted.values

resid_LSE <- Model1$residuals
resid_LTS <- Model2$residuals
resid_LMS <- Model6$residuals
resid_LAD_q1 <- Model3$residuals
resid_LAD_q5 <- Model4$residuals
resid_LAD_q9 <- Model5$residuals


ssR_LSE <- sum((yhat_LSE - ymean) ^ 2)
ssR_LMS <- sum((yhat_LMS - ymean) ^ 2)
ssR_LAD_q1 <- sum((yhat_LAD_q1 - ymean) ^ 2)
ssR_LAD_q5 <- sum((yhat_LAD_q5 - ymean) ^ 2)
ssR_LAD_q9 <- sum((yhat_LAD_q9 - ymean) ^ 2)

R2_LSE <- round(ssR_LSE / SST, 4)
R2_LTS <- round(sm2$r.squared, 4)
R2_LMS <- round(ssR_LMS / SST, 4)
R2_LAD_q1 <- round(ssR_LAD_q1 / SST, 4)
R2_LAD_q5 <- round(ssR_LAD_q5 / SST, 4)
R2_LAD_q9 <- round(ssR_LAD_q9 / SST, 4)


## MSE --

MSE_LSE <- round(sm1$sigma ^ 2, 3)
MSE_LAD_q1 <- round(mean((yhat_LAD_q1 - y1) ^ 2), 3)
MSE_LAD_q5 <- round(mean((yhat_LAD_q5 - y1) ^ 2), 3)
MSE_LAD_q9 <- round(mean((yhat_LAD_q5 - y1) ^ 2), 3)
MSE_LTS <- round(mean((yhat_LTS - y1) ^ 2), 3)
MSE_LMS <- round(mean((yhat_LMS - y1) ^ 2), 3)

# MAE --

MAE_LTS <- round(mean(abs(resid_LTS)), 3)
MAE_LSE <- round(mean(abs(resid_LSE)), 3)
MAE_LMS <- round(mean(abs(resid_LMS)), 3)
MAE_LAD_q1 <- round(mean(abs(resid_LAD_q1)), 3)
MAE_LAD_q5 <- round(mean(abs(resid_LAD_q5)), 3)
MAE_LAD_q9 <- round(mean(abs(resid_LAD_q9)), 3)


# coef --

b0_LSE <- sm1$coefficients[1]
b0_LAD_q1 <- sm3$coefficients[1, 1]
b0_LAD_q9 <- sm5$coefficients[1, 1]
b0_LAD_q5 <- sm4$coefficients[1, 1]
b0_LMS <- coef_Model6[1]
b0_LTS <- sm2$coefficients[1]

## beta1

b1_LSE <- sm1$coefficients[2]
b1_LAD_q1 <- sm3$coefficients[2, 1]
b1_LAD_q9 <- sm5$coefficients[2, 1]
b1_LAD_q5 <- sm4$coefficients[2, 1]
b1_LMS <- coef_Model6[2]
b1_LTS <- sm2$coefficients[2]

# gain results
temp1 <- paste0("LAD_q", c(1, 5, 9))
Final_Results <- data.frame(
    Model = c(temp1, "LMS", "LSE", "LTS"),
    InTerCept = c(b0_LAD_q1, b0_LAD_q5, b0_LAD_q9, b0_LMS,
                    b0_LSE, b0_LTS),  
    Beta1 = c(b1_LAD_q1, b1_LAD_q5, b1_LAD_q9, 
            b1_LMS, b1_LSE, b1_LTS), 
    MSE = c(MSE_LAD_q1, MSE_LAD_q5, MSE_LAD_q9, MSE_LMS, 
            MSE_LSE, MSE_LTS),
    MAE = c(MAE_LAD_q1, MAE_LAD_q5, MAE_LAD_q9, 
            MAE_LMS, MAE_LSE, MAE_LTS),
    R2 = c(R2_LAD_q1, R2_LAD_q5, R2_LAD_q9, 
            R2_LMS, R2_LSE, R2_LTS)
)

# show Concludes
knitr :: kable(Final_Results, caption = "Final Results for First Season", 
            align = "c")
Model InTerCept Beta1 MSE MAE R2
LAD_q1 1.052089 1.824620 17.576 1.463 0.2796
LAD_q5 1.610603 1.873909 16.038 1.212 0.2189
LAD_q9 2.103748 2.065066 16.038 1.615 0.2312
LMS 1.353550 1.818419 16.829 1.296 0.2427
LSE 1.524768 2.272496 15.849 1.560 0.2791
LTS 1.619836 1.873000 16.027 1.213 0.9341

Final Results for First Season



show Figure for 6 Models

plot(y1 ~ x, cex = 2,
col = adjustcolor("black", .4), pch = 15, main = "Robust Models")

abline(a = Final_Results[['InTerCept']][1], 
b = Final_Results[['Beta1']][1], 
col = 1, lty = 6, lwd = 3)

## Model m2
abline(a = Final_Results[['InTerCept']][2], 
b = Final_Results[['Beta1']][2], 
col = adjustcolor("red", 0.5), lty = 5, lwd = 3)
## Model m3
abline(a = Final_Results[['InTerCept']][3], 
b = Final_Results[['Beta1']][3], 
col = 3, lty = 4, lwd = 3)
## LAD Model: q = 0.5
abline(a = Final_Results[['InTerCept']][4], 
b = Final_Results[['Beta1']][4], 
col = 4, lty = 3, lwd = 3)

## LAD Model: q = 0.9
abline(a = Final_Results[['InTerCept']][5], 
b = Final_Results[['Beta1']][5], 
col = "darkgreen", lty = 1, lwd = 3)

## LAD Model: q = 0.1
abline(a = Final_Results[['InTerCept']][6], 
b = Final_Results[['Beta1']][6], 
col = adjustcolor("blue", alpha = 0.5), lty = 9, lwd = 5)
## add legend 
legend("topleft", legend = c("LAD: q = 0.1", "LAD: q = 0.5", 
"LAD: q = 0.9", "LMS", "LSE", "LTS"), 
col = c(1, "red", 3, 4, "darkgreen", "blue"), lty= c(6:3, 1, 9), 
lwd = rep(c(3, 5), c(5, 1)), bty = "n", cex = 2.5)





Season II

Plot2

plot(y2 ~ x, cex = 2, col = "orange", 
pch = 15)



Model I

Mod_1 <- lm(y2 ~ x)
summary(Mod_1)
## 
## Call:
## lm(formula = y2 ~ x)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027695 -0.021968 -0.009112  0.014201  0.096077 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.128980   0.009393   13.73  < 2e-16 ***
## x           -0.043425   0.003861  -11.25 2.66e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02976 on 50 degrees of freedom
## Multiple R-squared:  0.7167, Adjusted R-squared:  0.7111 
## F-statistic: 126.5 on 1 and 50 DF,  p-value: 2.658e-15


Box-Cox

bx <- boxcox(y2 ~ x)



Transform

lam <- bx$x[which.max(bx$y)]
y2_tr <- (y2 ** lam - 1) / lam
opar <- par(no.readonly = TRUE)
par(mfrow = c(1, 2))

plot(y2_tr ~ x, col = "darkblue", pch = 15, 
main = "Box-Cox Transform scatter data")
plot(y2 ~ x, cex = 2, col = "orange", 
pch = 15, main = "Original Data")



Ols Box cox model

Mod_2 <- lm(y2_tr ~ x)
summary(Mod_2)
## 
## Call:
## lm(formula = y2_tr ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.95839 -0.13080  0.01516  0.16917  0.63553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.08209    0.07190  -15.05   <2e-16 ***
## x           -1.00337    0.02956  -33.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2278 on 50 degrees of freedom
## Multiple R-squared:  0.9584, Adjusted R-squared:  0.9576 
## F-statistic:  1153 on 1 and 50 DF,  p-value: < 2.2e-16


Model III

ff <- function(x, b2, b3) b2 * exp(-b3 * x)
b2 <- y2[which.min(abs(x))] 
b3 <- -log((y2[3] ) / b2) / x[3]
Mod_3 <- nls(y2 ~ ff(x, b2, b3),
start = list( b2 = b2, b3 = b3))
s_mod3 <- summary(Mod_3)
s_mod3
## 
## Formula: y2 ~ ff(x, b2, b3)
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## b2 0.155502   0.006858   22.68   <2e-16 ***
## b3 0.806670   0.041899   19.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01953 on 50 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 3.527e-06


Model IV

b0 <- max(y2)
Mod_4 <- nls(y2 ~ SSlogis(x, beta1, beta2, beta3),
start = list(beta1 = b0, 
beta2 = b0, beta3 = -b0))
s_mod4 <- summary(Mod_4)
s_mod4
## 
## Formula: y2 ~ SSlogis(x, beta1, beta2, beta3)
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)    
## beta1  0.269306   0.004318   62.38   <2e-16 ***
## beta2  0.634677   0.022272   28.50   <2e-16 ***
## beta3 -0.511812   0.010632  -48.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003381 on 49 degrees of freedom
## 
## Number of iterations to convergence: 7 
## Achieved convergence tolerance: 5.714e-06


s_mod1 <- summary(Mod_1)
s_mod2 <- summary(Mod_2)
ymean <- mean(y2)
yhat_ols <- fitted(Mod_1)
yhat_box_cox <- fitted(Mod_2)
yhat_mod_3 <- fitted(Mod_3)
yhat_mod_4 <- fitted(Mod_4)
sst <- sum((y2 - ymean) ^ 2)
ssr_3 <- sum((yhat_mod_3 - ymean) ^ 2)
ssr_4 <- sum((yhat_mod_4 - ymean) ^ 2)
R2_mod1 <- s_mod1$r.squared
R2_mod2 <- s_mod2$r.squared
R2_mod3 <- ssr_3 / sst
R2_mod4 <- ssr_4 / sst

mse_mod1 <- mean((yhat_ols - y2) ^ 2)
mse_mod2 <- mean((yhat_box_cox - y2) ^ 2)
mse_mod3 <- mean((yhat_mod_3 - y2) ^ 2)
mse_mod4 <- mean((yhat_mod_4 - y2) ^ 2)

Temp<- data.frame(
    Model = c("bx-olx", "ols", "exponential", "logistic"), 
    R2 = round(c(R2_mod2, R2_mod1, R2_mod3, R2_mod4), 3), 
    MSE = round(c(mse_mod1, mse_mod2, mse_mod3, mse_mod4), 5)
)
knitr :: kable(Temp, caption = "Table of Results", align = "c")
Model R2 MSE
bx-olx 0.958 0.00085
ols 0.717 12.00357
exponential 0.749 0.00037
logistic 0.968 0.00001

Table of Results



Plot3

par(mfrow = c(1, 1))
plot(x = x, y = y2_tr, pch = 15, col = "darkgreen", cex = 2, 
main = "Fit Regression line after Box-Cox Transform")
abline(a = Mod_1$coefficients[1], b = Mod_1$coefficients[2], 
col = "cyan", lwd = 2)

plot(x, y2, pch = 15, col = "orange", cex = 2.5, 
main = "Non Linear Regression with Linear Regression")

cf1 <- s_mod3$coefficients[, 1]

f <- function(x) ff(x, b2 = cf1[1], b3 = cf1[2])
f2 <- function(x) SSlogis(x, Asym = s_mod4$coefficients[, 1][1], 
xmid = s_mod4$coefficients[, 1][2], scal = s_mod4$coefficients[, 1][3])
curve(f, extendrange(x)[1], extendrange(x)[2], 
col = "darkblue", add = TRUE)
curve(f2, extendrange(x)[1], extendrange(x)[2], col = "brown", 
add = TRUE)
abline(a = Mod_1$coefficients[1], b = Mod_1$coefficients[2], 
col ="tomato")

legend("topright", legend = c("Exponential", "Logistic", "OLS"), 
col = c("darkblue", "brown", "tomato"), lty = 1, bty = "n", 
cex = 2.5)