Merge pull request #3 from JorisJoBo/develop

Develop
JorisJoBo · Feb 4, 2018 · c60fe49 · c60fe49
2 parents 4dec4d1 + 3423266
commit c60fe49
Show file tree

Hide file tree

Showing 25 changed files with 1,400 additions and 5,978 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 node_modules
 *.csv
+*.zip
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,6 @@ language: python
 python:
   - "3.6"
 install:
-  - pip install -r python-requirements.txt
-# command to run tests
+  - pip install pycodestyle
 script:
   - pycodestyle --show-source --show-pep8 ./
diff --git a/DecisionTree_Dependent.py b/DecisionTree_Dependent.py
@@ -0,0 +1,110 @@
+from sklearn import tree
+import csv
+import numpy as np
+import graphviz
+import matplotlib.pylab as plt
+
+# Converts relative_learning_data.csv to a list
+data = []
+with open('relative_learning_data.csv') as f:
+    f = csv.reader(f, delimiter=',')
+    for line in f:
+        data.append(line)
+
+features = data[0]
+classes = []
+data = data[1:]
+# ID's of columns that aren't usefull for decision trees.
+removed_features = [0, 1, 2, 3, 4, 5]
+
+# Removes these columns from the feature names and the dataset.
+cl_features = []
+cl_data = []
+for i in range(len(features)):
+    if i not in removed_features:
+        cl_features.append(features[i])
+for line in data:
+    newline = []
+    for i in range(len(features)):
+        if i not in removed_features:
+            newline.append(line[i])
+    cl_data.append(newline)
+features = cl_features
+data = cl_data
+
+
+def decisiontree(data):
+    Xt = []
+    Yt = []
+    Xv = []
+    Yv = []
+    # Adds 90% of the data to the trainingsset, 10% to the validationset.
+    np.random.shuffle(data)
+    trainingsize = 0.9 * len(data)
+    training = data[:int(trainingsize)]
+    validation = data[int(trainingsize):]
+
+    # Creates the X and Y parts of the training and test sets.
+    # Also fills the tree species list (classes) with all different species.
+    for line in training:
+        if line[-1] not in classes:
+            classes.append(line[-1])
+        Xt.append(line[0:-1])
+        Yt.append(line[-1])
+    for line in validation:
+        if line[-1] not in classes:
+            return decisiontree(data)
+        Xv.append(line[0:-1])
+        Yv.append(line[-1])
+
+    clf = tree.DecisionTreeClassifier()
+    clf = clf.fit(Xt, Yt)
+    return clf, Xt, Yt, Xv, Yv
+
+
+clf, Xt, Yt, Xv, Yv = decisiontree(data)
+# Sorts the classes alphabetically, which makes them work as class_names
+classes.sort()
+
+# This creates an image of the decisiontree and exports it as a PDF.
+dot_data = tree.export_graphviz(clf,
+                                out_file=None,
+                                class_names=classes,
+                                feature_names=features[:-1],
+                                rounded=True,
+                                special_characters=True)
+graph = graphviz.Source(dot_data)
+graph.render('tree', view=True)
+
+
+# This calculates the average correctness for the dataset.
+def avgcost(data, n):
+    totalcost = 0
+    for i in range(n):
+        clf, Xt, Yt, Xv, Yv = decisiontree(data)
+        totalcost = totalcost + clf.score(Xv, Yv)
+    return totalcost / n
+
+
+print('Average Correctness: ' + str(avgcost(data, 500)))
+
+
+# This calculates the usage (/importance) for all features in the decisiontree.
+def avgimportance(data, n, features):
+    totalimportance = []
+    for i in range(n):
+        clf, _, _, _, _ = decisiontree(data)
+        importance = clf.feature_importances_
+        if len(totalimportance) == 0:
+            totalimportance = importance
+        else:
+            totalimportance = [
+                x + y for x,
+                y in zip(
+                    totalimportance,
+                    importance)]
+    for i in range(len(importance)):
+        print(str(features[i]) + ': ' + str(totalimportance[i] / n))
+
+
+avgimportance(data, 500, features)
diff --git a/DecisionTree_Independent.py b/DecisionTree_Independent.py
@@ -0,0 +1,120 @@
+from sklearn import tree
+import csv
+import numpy as np
+import graphviz
+import matplotlib.pylab as plt
+
+# Converts relative_learning_data.csv to a list
+data = []
+with open('relative_learning_data.csv') as f:
+    f = csv.reader(f, delimiter=',')
+    for line in f:
+        data.append(line)
+
+features = data[0]
+classes = []
+data = data[1:]
+# ID's of columns that aren't usefull for decision trees.
+removed_features = [0, 2, 3, 4, 5]
+
+# Removes these columns from the feature names and the dataset.
+cl_features = []
+cl_data = []
+for i in range(len(features)):
+    if i not in removed_features:
+        cl_features.append(features[i])
+for line in data:
+    newline = []
+    for i in range(len(features)):
+        if i not in removed_features:
+            newline.append(line[i])
+    cl_data.append(newline)
+features = cl_features
+data = cl_data
+
+
+def decisiontree(data):
+    Xt = []
+    Yt = []
+    Xv = []
+    Yv = []
+    # Finds all polygonID's, randomly adds 90% of ID's to the trainingset.
+    polygonIDs = []
+    for line in data:
+        if line[0] not in polygonIDs:
+            polygonIDs.append(line[0])
+    np.random.shuffle(polygonIDs)
+    trainingsize = 0.9 * len(polygonIDs)
+    trainingIDs = polygonIDs[:int(trainingsize)]
+
+    # Assigns each line in the list to the training/test dataset.
+    # Also fills the tree species list (classes) with all different species.
+    training = []
+    validation = []
+    for line in data:
+        if line[-1] not in classes:
+            classes.append(line[-1])
+        if line[0] in trainingIDs:
+            training.append(line)
+        else:
+            validation.append(line)
+    # Creates the X and Y parts of the training and test sets.
+    for line in training:
+        Xt.append(line[1:-1])
+        Yt.append(line[-1])
+    for line in validation:
+        Xv.append(line[1:-1])
+        Yv.append(line[-1])
+
+    clf = tree.DecisionTreeClassifier(min_impurity_split=0.77)
+    clf = clf.fit(Xt, Yt)
+    return clf, Xt, Yt, Xv, Yv
+
+
+clf, Xt, Yt, Xv, Yv = decisiontree(data)
+
+# Sorts the classes alphabetically, which makes them work as class_names
+classes.sort()
+
+# This creates an image of the decisiontree and exports it as a PDF
+dot_data = tree.export_graphviz(clf,
+                                out_file=None,
+                                class_names=classes,
+                                feature_names=features[1:-1],
+                                rounded=True,
+                                special_characters=True)
+graph = graphviz.Source(dot_data)
+graph.render('tree', view=True)
+
+
+# This calculates the average correctness for the dataset.
+def avgcost(data, n):
+    totalcost = 0
+    for i in range(n):
+        clf, Xt, Yt, Xv, Yv = decisiontree(data)
+        totalcost = totalcost + clf.score(Xv, Yv)
+    return totalcost / n
+
+
+print('Average Correctness: ' + str(avgcost(data, 500)))
+
+
+# This calculates the usage (/importance) for all features in the decisiontree.
+def avgimportance(data, n, features):
+    totalimportance = []
+    for i in range(n):
+        clf, _, _, _, _ = decisiontree(data)
+        importance = clf.feature_importances_
+        if len(totalimportance) == 0:
+            totalimportance = importance
+        else:
+            totalimportance = [
+                x + y for x,
+                y in zip(
+                    totalimportance,
+                    importance)]
+    for i in range(len(importance)):
+        print(str(features[i]) + ': ' + str(totalimportance[i] / n))
+
+
+avgimportance(data, 500, features)
diff --git a/LAS_scripts/LAScanopyclassify.bat b/LAS_scripts/LAScanopyclassify.bat
@@ -0,0 +1,11 @@
+@echo off
+set outputfolder=LiDAR_data
+
+echo Running LAScanopy... (this may take a while)
+mkdir %outputfolder%\canopy
+for %%a in (%outputfolder%\classify\*.laz) do ( 
+	echo  - Running canopy on %%~nxa...
+	lascanopy -i LiDAR_data\classify\%%~nxa -names -lor LiDAR_data\ID_forest_grid_coords.csv -dns -p 5 10 25 50 75 90 -min -max -avg -std -ske -kur -qav -cov -c 2 4 10 50 -int_min -int_max -int_avg -int_qav -int_std -int_ske -int_kur -int_c 128 256 1024 -int_p 25 50 75 -o LiDAR_data\canopy\%%~na.csv
+)
+
+PAUSE
diff --git a/LASconversion.bat → LAS_scripts/LASconversion.bat b/LASconversion.bat → LAS_scripts/LASconversion.bat
@@ -1,14 +1,14 @@
 @echo off
-set filelist=26gz1 26gn1 32fz2 33az1 33az2 32hn2 33cn1 33cn2 32hz2 33cz1 33cz2
-set outputfolder=LiDAR_data
+set filelist=26gn1 32fz2 33az1 33az2 32hn2 33cn1 33cn2 32hz2 33cz1 33cz2 28cn1 28cn2 28cz2
+set outputfolder=LiDAR_data2
 
 if not exist %outputfolder% (
 	echo LiDAR_data folder doesn't exist yet. Creating...
 	mkdir %outputfolder%
 )
 
 echo Stage 1: running LASmerge... (this may take a while)
-for %%a in (%filelist%) do ( 
+for %%a in (%filelist%) do (
 	if exist g%%a.laz (
 		if exist u%%a.laz (
 			echo  - Merging %%a...
@@ -18,44 +18,31 @@ for %%a in (%filelist%) do (
 )
 
 echo Stage 2: running LASindex... (this may take a while)
-for %%a in (%outputfolder%\*.laz) do ( 
+for %%a in (%outputfolder%\*.laz) do (
 	echo  - Indexing %%a...
 	lasindex -i %%a
 )
 
 echo Stage 3: running Tiling (this may take a while)
 mkdir %outputfolder%\tiling
-for %%a in (%outputfolder%\*.laz) do ( 
+for %%a in (%outputfolder%\*.laz) do (
 	echo  - Running on %%~na...
 	lastile -i %%a -o %outputfolder%\tiling\%%~na
 )
 
 echo Stage 4: running LASground_new (this may take a while)
 mkdir %outputfolder%\ground
-for %%a in (%outputfolder%\tiling\*.las) do ( 
+for %%a in (%outputfolder%\tiling\*.las) do (
 	echo  - Running on %%~nxa...
 	lasground_new -i %outputfolder%\tiling\%%~nxa -o %outputfolder%\ground\%%~na.laz -ignore_class 7
 )
 
 echo Stage 5: running LASheight (this may take a while)
 mkdir %outputfolder%\height
-for %%a in (%outputfolder%\ground\*.laz) do ( 
+for %%a in (%outputfolder%\ground\*.laz) do (
 	echo  - Running on %%~nxa...
 	lasheight -i %outputfolder%\ground\%%~nxa -o %outputfolder%\height\%%~na.laz
 )
 
-echo Stage 6: running LASclassify (this may take a while)
-mkdir %outputfolder%\classify
-for %%a in (%outputfolder%\height\*.laz) do ( 
-	echo  - Running on %%~nxa...
-	lasclassify -i %outputfolder%\height\%%~nxa -o %outputfolder%\classify\%%~na.laz
-)
-
-echo Stage 7: running LASindex... (this may take a while)
-for %%a in (%outputfolder%\classify\*.laz) do ( 
-	echo  - Indexing %%~nxa...
-	lasindex -i %outputfolder%\classify\%%~nxa
-)
-
 echo Conversion finished
-PAUSE
+PAUSE
diff --git a/Obsolete_code/add_canopy_files.py b/Obsolete_code/add_canopy_files.py
@@ -0,0 +1,47 @@
+'''
+ add_canopy_files.py
+
+ usage for map: add_canopy_files.py input_directory output.csv
+ usage for single file: add_canopy_files.py input.csv output.csv
+
+ Adds together all csv files generated from lascanopy and removes lines
+ with missing values. Can also be used to remove lines with missing values
+ from a single csv file.
+ The map containing the csv files that have to be combined has to be
+ present in the same directory as this file.
+'''
+import os
+from sys import argv
+
+dirname = argv[1]
+try:
+    outputfile = argv[2]
+except BaseException:
+    print("no output file given, output saved to 'combined_canopy.csv'")
+    outputfile = "combined_canopy.csv"
+
+lines = []
+header = None
+if dirname.endswith('.csv'):
+    with open(dirname, 'r') as f:
+        for i, line in enumerate(f, 0):
+            if i != 0:
+                lines.append(line)
+            else:
+                header = line
+else:
+    for file in os.listdir(dirname):
+        if file.endswith(".csv"):
+            with open(dirname + '/' + file, 'r') as f:
+                for i, line in enumerate(f, 0):
+                    if i != 0:
+                        lines.append(line)
+                    else:
+                        header = line
+
+with open(outputfile, 'w') as f:
+    f.write(header)
+    for line in lines:
+        line2 = line.split(',')
+        if "-" not in line2:
+            f.write(line)
diff --git a/Obsolete_code/common-tree-filter.py b/Obsolete_code/common-tree-filter.py
@@ -0,0 +1,23 @@
+import csv
+
+with open("learning_data.csv") as f1:
+    with open("common_learning_data.csv", 'w') as f2:
+        lines = csv.reader(f1, delimiter=";")
+        data = []
+        for line in lines:
+            data.append(line)
+        seen = []
+        for line1 in data:
+            if line1 == data[0]:
+                f2.write(';'.join(line1) + '\n')
+            c = line1[-1]
+            if c not in seen:
+                counter = 0
+                for line2 in data:
+                    if line2[-1] == c:
+                        counter += 1
+                if counter >= 50:
+                    seen.append(c)
+                    f2.write(';'.join(line1) + '\n')
+            else:
+                f2.write(';'.join(line1) + '\n')