forked from tquaife/julesML
-
Notifications
You must be signed in to change notification settings - Fork 0
/
julesML.py
77 lines (62 loc) · 2.28 KB
/
julesML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from datetime import datetime
import sys
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.ensemble import ExtraTreesRegressor as etr
from sklearn.neural_network import MLPRegressor as nnr
from sklearn import svm
from julesDataHandler import *
if __name__=="__main__":
#load data
#target=julesData("data/gh_point_smc_avail_top.txt")
target=julesData("data/gh_point_smcl_lvl1.txt")
#target=julesData("data/gh_point_smcl_lvl2.txt")
feature1=julesData("data/gh_point_t1p5m_gb.txt")
feature1.transform_data_to_day_of_year()
feature2=julesData("data/gh_point_t1p5m_gb.txt")
feature2.transform_data_rc_running_mean()
feature3=julesData("data/gh_point_precip.txt")
feature3.transform_data_rc_running_mean()
feature4=julesData("data/gh_point_precip.txt")
feature4.lags=[0,1,2,3,4,5]
feat_list=[feature1,feature2,feature3,feature4]
#generate training and validation data
#using np choice here and splitting the sample
#in half to ensure independant testing/traning
n=len(target.data[:])
sample_tmp=np.arange(365,n-365)
sample=np.random.choice(sample_tmp,6000)
s_train=sampleBuilder(sample[:3000],target,feat_list)
s_train.scale()
#validation:
s_evalt=sampleBuilder(sample[3000:],target,feat_list)
s_evalt.scale()
#final year hindcast:
sample=np.arange(n-365,n)
s_hcast=sampleBuilder(sample,target,feat_list)
s_hcast.scale()
#SVM
#n.b. data needs scaling.
m=svm.NuSVR(kernel="rbf", C=1.0)
#Use the SVM to generate a first order estimate of SM
m.fit(s_train.X,s_train.Y)
pred_evalt=m.predict(s_evalt.X)
pred_train=m.predict(s_train.X)
pred_hcast=m.predict(s_hcast.X)
do_scatter_plot=False
#do_scatter_plot=True
if do_scatter_plot:
plt.plot(s_evalt.Y,pred_evalt,".")
plt.plot(s_train.Y,pred_train,".")
else:
#rescale the data
a=s_hcast.Y_mean
b=s_hcast.Y_std
plt.plot(s_hcast.Y*b+a,"-",label="JULES")
plt.plot(pred_hcast*b+a,"-",label="ML")
plt.xlabel('Day of year')
plt.ylabel(s_hcast.target.var_name)
plt.legend()
plt.show()
#plt.savefig("julesML_soil_moisture_example.png")