Skip to content

Commit

Permalink
Merge pull request #3 from JorisJoBo/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
JorisJoBo authored Feb 4, 2018
2 parents 4dec4d1 + 3423266 commit c60fe49
Show file tree
Hide file tree
Showing 25 changed files with 1,400 additions and 5,978 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
node_modules
*.csv
*.zip
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ language: python
python:
- "3.6"
install:
- pip install -r python-requirements.txt
# command to run tests
- pip install pycodestyle
script:
- pycodestyle --show-source --show-pep8 ./
110 changes: 110 additions & 0 deletions DecisionTree_Dependent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from sklearn import tree
import csv
import numpy as np
import graphviz
import matplotlib.pylab as plt

# Converts relative_learning_data.csv to a list
data = []
with open('relative_learning_data.csv') as f:
f = csv.reader(f, delimiter=',')
for line in f:
data.append(line)

features = data[0]
classes = []
data = data[1:]
# ID's of columns that aren't usefull for decision trees.
removed_features = [0, 1, 2, 3, 4, 5]

# Removes these columns from the feature names and the dataset.
cl_features = []
cl_data = []
for i in range(len(features)):
if i not in removed_features:
cl_features.append(features[i])
for line in data:
newline = []
for i in range(len(features)):
if i not in removed_features:
newline.append(line[i])
cl_data.append(newline)
features = cl_features
data = cl_data


def decisiontree(data):
Xt = []
Yt = []
Xv = []
Yv = []
# Adds 90% of the data to the trainingsset, 10% to the validationset.
np.random.shuffle(data)
trainingsize = 0.9 * len(data)
training = data[:int(trainingsize)]
validation = data[int(trainingsize):]

# Creates the X and Y parts of the training and test sets.
# Also fills the tree species list (classes) with all different species.
for line in training:
if line[-1] not in classes:
classes.append(line[-1])
Xt.append(line[0:-1])
Yt.append(line[-1])
for line in validation:
if line[-1] not in classes:
return decisiontree(data)
Xv.append(line[0:-1])
Yv.append(line[-1])

clf = tree.DecisionTreeClassifier()
clf = clf.fit(Xt, Yt)
return clf, Xt, Yt, Xv, Yv


clf, Xt, Yt, Xv, Yv = decisiontree(data)
# Sorts the classes alphabetically, which makes them work as class_names
classes.sort()

# This creates an image of the decisiontree and exports it as a PDF.
dot_data = tree.export_graphviz(clf,
out_file=None,
class_names=classes,
feature_names=features[:-1],
rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph.render('tree', view=True)


# This calculates the average correctness for the dataset.
def avgcost(data, n):
totalcost = 0
for i in range(n):
clf, Xt, Yt, Xv, Yv = decisiontree(data)
totalcost = totalcost + clf.score(Xv, Yv)
return totalcost / n


print('Average Correctness: ' + str(avgcost(data, 500)))


# This calculates the usage (/importance) for all features in the decisiontree.
def avgimportance(data, n, features):
totalimportance = []
for i in range(n):
clf, _, _, _, _ = decisiontree(data)
importance = clf.feature_importances_
if len(totalimportance) == 0:
totalimportance = importance
else:
totalimportance = [
x + y for x,
y in zip(
totalimportance,
importance)]
for i in range(len(importance)):
print(str(features[i]) + ': ' + str(totalimportance[i] / n))


avgimportance(data, 500, features)
120 changes: 120 additions & 0 deletions DecisionTree_Independent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from sklearn import tree
import csv
import numpy as np
import graphviz
import matplotlib.pylab as plt

# Converts relative_learning_data.csv to a list
data = []
with open('relative_learning_data.csv') as f:
f = csv.reader(f, delimiter=',')
for line in f:
data.append(line)

features = data[0]
classes = []
data = data[1:]
# ID's of columns that aren't usefull for decision trees.
removed_features = [0, 2, 3, 4, 5]

# Removes these columns from the feature names and the dataset.
cl_features = []
cl_data = []
for i in range(len(features)):
if i not in removed_features:
cl_features.append(features[i])
for line in data:
newline = []
for i in range(len(features)):
if i not in removed_features:
newline.append(line[i])
cl_data.append(newline)
features = cl_features
data = cl_data


def decisiontree(data):
Xt = []
Yt = []
Xv = []
Yv = []
# Finds all polygonID's, randomly adds 90% of ID's to the trainingset.
polygonIDs = []
for line in data:
if line[0] not in polygonIDs:
polygonIDs.append(line[0])
np.random.shuffle(polygonIDs)
trainingsize = 0.9 * len(polygonIDs)
trainingIDs = polygonIDs[:int(trainingsize)]

# Assigns each line in the list to the training/test dataset.
# Also fills the tree species list (classes) with all different species.
training = []
validation = []
for line in data:
if line[-1] not in classes:
classes.append(line[-1])
if line[0] in trainingIDs:
training.append(line)
else:
validation.append(line)
# Creates the X and Y parts of the training and test sets.
for line in training:
Xt.append(line[1:-1])
Yt.append(line[-1])
for line in validation:
Xv.append(line[1:-1])
Yv.append(line[-1])

clf = tree.DecisionTreeClassifier(min_impurity_split=0.77)
clf = clf.fit(Xt, Yt)
return clf, Xt, Yt, Xv, Yv


clf, Xt, Yt, Xv, Yv = decisiontree(data)

# Sorts the classes alphabetically, which makes them work as class_names
classes.sort()

# This creates an image of the decisiontree and exports it as a PDF
dot_data = tree.export_graphviz(clf,
out_file=None,
class_names=classes,
feature_names=features[1:-1],
rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph.render('tree', view=True)


# This calculates the average correctness for the dataset.
def avgcost(data, n):
totalcost = 0
for i in range(n):
clf, Xt, Yt, Xv, Yv = decisiontree(data)
totalcost = totalcost + clf.score(Xv, Yv)
return totalcost / n


print('Average Correctness: ' + str(avgcost(data, 500)))


# This calculates the usage (/importance) for all features in the decisiontree.
def avgimportance(data, n, features):
totalimportance = []
for i in range(n):
clf, _, _, _, _ = decisiontree(data)
importance = clf.feature_importances_
if len(totalimportance) == 0:
totalimportance = importance
else:
totalimportance = [
x + y for x,
y in zip(
totalimportance,
importance)]
for i in range(len(importance)):
print(str(features[i]) + ': ' + str(totalimportance[i] / n))


avgimportance(data, 500, features)
11 changes: 11 additions & 0 deletions LAS_scripts/LAScanopyclassify.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@echo off
set outputfolder=LiDAR_data

echo Running LAScanopy... (this may take a while)
mkdir %outputfolder%\canopy
for %%a in (%outputfolder%\classify\*.laz) do (
echo - Running canopy on %%~nxa...
lascanopy -i LiDAR_data\classify\%%~nxa -names -lor LiDAR_data\ID_forest_grid_coords.csv -dns -p 5 10 25 50 75 90 -min -max -avg -std -ske -kur -qav -cov -c 2 4 10 50 -int_min -int_max -int_avg -int_qav -int_std -int_ske -int_kur -int_c 128 256 1024 -int_p 25 50 75 -o LiDAR_data\canopy\%%~na.csv
)

PAUSE
29 changes: 8 additions & 21 deletions LASconversion.bat → LAS_scripts/LASconversion.bat
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
@echo off
set filelist=26gz1 26gn1 32fz2 33az1 33az2 32hn2 33cn1 33cn2 32hz2 33cz1 33cz2
set outputfolder=LiDAR_data
set filelist=26gn1 32fz2 33az1 33az2 32hn2 33cn1 33cn2 32hz2 33cz1 33cz2 28cn1 28cn2 28cz2
set outputfolder=LiDAR_data2

if not exist %outputfolder% (
echo LiDAR_data folder doesn't exist yet. Creating...
mkdir %outputfolder%
)

echo Stage 1: running LASmerge... (this may take a while)
for %%a in (%filelist%) do (
for %%a in (%filelist%) do (
if exist g%%a.laz (
if exist u%%a.laz (
echo - Merging %%a...
Expand All @@ -18,44 +18,31 @@ for %%a in (%filelist%) do (
)

echo Stage 2: running LASindex... (this may take a while)
for %%a in (%outputfolder%\*.laz) do (
for %%a in (%outputfolder%\*.laz) do (
echo - Indexing %%a...
lasindex -i %%a
)

echo Stage 3: running Tiling (this may take a while)
mkdir %outputfolder%\tiling
for %%a in (%outputfolder%\*.laz) do (
for %%a in (%outputfolder%\*.laz) do (
echo - Running on %%~na...
lastile -i %%a -o %outputfolder%\tiling\%%~na
)

echo Stage 4: running LASground_new (this may take a while)
mkdir %outputfolder%\ground
for %%a in (%outputfolder%\tiling\*.las) do (
for %%a in (%outputfolder%\tiling\*.las) do (
echo - Running on %%~nxa...
lasground_new -i %outputfolder%\tiling\%%~nxa -o %outputfolder%\ground\%%~na.laz -ignore_class 7
)

echo Stage 5: running LASheight (this may take a while)
mkdir %outputfolder%\height
for %%a in (%outputfolder%\ground\*.laz) do (
for %%a in (%outputfolder%\ground\*.laz) do (
echo - Running on %%~nxa...
lasheight -i %outputfolder%\ground\%%~nxa -o %outputfolder%\height\%%~na.laz
)

echo Stage 6: running LASclassify (this may take a while)
mkdir %outputfolder%\classify
for %%a in (%outputfolder%\height\*.laz) do (
echo - Running on %%~nxa...
lasclassify -i %outputfolder%\height\%%~nxa -o %outputfolder%\classify\%%~na.laz
)

echo Stage 7: running LASindex... (this may take a while)
for %%a in (%outputfolder%\classify\*.laz) do (
echo - Indexing %%~nxa...
lasindex -i %outputfolder%\classify\%%~nxa
)

echo Conversion finished
PAUSE
PAUSE
47 changes: 47 additions & 0 deletions Obsolete_code/add_canopy_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'''
add_canopy_files.py
usage for map: add_canopy_files.py input_directory output.csv
usage for single file: add_canopy_files.py input.csv output.csv
Adds together all csv files generated from lascanopy and removes lines
with missing values. Can also be used to remove lines with missing values
from a single csv file.
The map containing the csv files that have to be combined has to be
present in the same directory as this file.
'''
import os
from sys import argv

dirname = argv[1]
try:
outputfile = argv[2]
except BaseException:
print("no output file given, output saved to 'combined_canopy.csv'")
outputfile = "combined_canopy.csv"

lines = []
header = None
if dirname.endswith('.csv'):
with open(dirname, 'r') as f:
for i, line in enumerate(f, 0):
if i != 0:
lines.append(line)
else:
header = line
else:
for file in os.listdir(dirname):
if file.endswith(".csv"):
with open(dirname + '/' + file, 'r') as f:
for i, line in enumerate(f, 0):
if i != 0:
lines.append(line)
else:
header = line

with open(outputfile, 'w') as f:
f.write(header)
for line in lines:
line2 = line.split(',')
if "-" not in line2:
f.write(line)
23 changes: 23 additions & 0 deletions Obsolete_code/common-tree-filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import csv

with open("learning_data.csv") as f1:
with open("common_learning_data.csv", 'w') as f2:
lines = csv.reader(f1, delimiter=";")
data = []
for line in lines:
data.append(line)
seen = []
for line1 in data:
if line1 == data[0]:
f2.write(';'.join(line1) + '\n')
c = line1[-1]
if c not in seen:
counter = 0
for line2 in data:
if line2[-1] == c:
counter += 1
if counter >= 50:
seen.append(c)
f2.write(';'.join(line1) + '\n')
else:
f2.write(';'.join(line1) + '\n')
Loading

0 comments on commit c60fe49

Please sign in to comment.