rahlk · rahlk · Nov 3, 2016 · Nov 4, 2016 · Nov 5, 2016 · Nov 5, 2016
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .DS_Store
 .idea/
 .idea/*
+*.json
diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,12 @@ pandas
 matplotlib
 click
 flask
-networkx
+networkx
+texttable
+oct2py
+cvxopt
+# prettytable
+pillow
+pygraphviz
+javabridge
+python-weka-wrapper
diff --git a/src/SEER/submit.sh b/src/SEER/submit.sh
diff --git a/src/CCA/__init__.py → src/defects/CCA/__init__.py b/src/CCA/__init__.py → src/defects/CCA/__init__.py
diff --git a/src/HDP/HDP.py → src/defects/HDP/HDP.py b/src/HDP/HDP.py → src/defects/HDP/HDP.py
diff --git a/src/HDP/__init__.py → src/defects/HDP/__init__.py b/src/HDP/__init__.py → src/defects/HDP/__init__.py
diff --git a/src/HDP/lib/java/commons-math3-3.5.jar → ...efects/HDP/lib/java/commons-math3-3.5.jar b/src/HDP/lib/java/commons-math3-3.5.jar → ...efects/HDP/lib/java/commons-math3-3.5.jar
diff --git a/src/HDP/lib/java/weka.jar → src/defects/HDP/lib/java/weka.jar b/src/HDP/lib/java/weka.jar → src/defects/HDP/lib/java/weka.jar
diff --git a/src/HDP/matching/HSAnalyzer.py → src/defects/HDP/matching/HSAnalyzer.py b/src/HDP/matching/HSAnalyzer.py → src/defects/HDP/matching/HSAnalyzer.py
diff --git a/src/HDP/matching/KSAnalyzer.py → src/defects/HDP/matching/KSAnalyzer.py b/src/HDP/matching/KSAnalyzer.py → src/defects/HDP/matching/KSAnalyzer.py
diff --git a/src/HDP/matching/PAnalyzer.py → src/defects/HDP/matching/PAnalyzer.py b/src/HDP/matching/PAnalyzer.py → src/defects/HDP/matching/PAnalyzer.py
diff --git a/src/HDP/matching/SCoAnalyzer.py → src/defects/HDP/matching/SCoAnalyzer.py b/src/HDP/matching/SCoAnalyzer.py → src/defects/HDP/matching/SCoAnalyzer.py
diff --git a/src/HDP/matching/__init__.py → src/defects/HDP/matching/__init__.py b/src/HDP/matching/__init__.py → src/defects/HDP/matching/__init__.py
diff --git a/src/HDP/matching/filter.py → src/defects/HDP/matching/filter.py b/src/HDP/matching/filter.py → src/defects/HDP/matching/filter.py
diff --git a/src/HDP/matching/match_metrics.py → src/defects/HDP/matching/match_metrics.py b/src/HDP/matching/match_metrics.py → src/defects/HDP/matching/match_metrics.py
diff --git a/src/HDP/picklejar/__init__.py → src/defects/HDP/picklejar/__init__.py b/src/HDP/picklejar/__init__.py → src/defects/HDP/picklejar/__init__.py
diff --git a/src/HDP/picklejar/result.pkl → src/defects/HDP/picklejar/result.pkl b/src/HDP/picklejar/result.pkl → src/defects/HDP/picklejar/result.pkl
diff --git a/src/HDP/picklejar/result_dump.pkl → src/defects/HDP/picklejar/result_dump.pkl b/src/HDP/picklejar/result_dump.pkl → src/defects/HDP/picklejar/result_dump.pkl
diff --git a/src/HDP/prediction/__init__.py → src/defects/HDP/prediction/__init__.py b/src/HDP/prediction/__init__.py → src/defects/HDP/prediction/__init__.py
diff --git a/src/HDP/prediction/model.py → src/defects/HDP/prediction/model.py b/src/HDP/prediction/model.py → src/defects/HDP/prediction/model.py
diff --git a/src/HDP/prediction/smote.py → src/defects/HDP/prediction/smote.py b/src/HDP/prediction/smote.py → src/defects/HDP/prediction/smote.py
diff --git a/src/defects/HDP/result.csv b/src/defects/HDP/result.csv
@@ -0,0 +1,23 @@
+cm,0.5,0.06
+camel,0.6,0.06
+Zxing,0.66,0.07
+ant,0.55,0.07
+poi,0.56,0.09
+JDT,0.51,0.07
+ivy,0.62,0.05
+LC,0.55,0.07
+xerces,0.58,0.07
+Safe,0.67,0.09
+Apache,0.52,0.09
+log4j,0.65,0.06
+jm,0.54,0.08
+jedit,0.63,0.09
+PDE,0.54,0.08
+EQ,0.55,0.09
+kc,0.6,0.09
+mc,0.5,0.08
+xalan,0.54,0.07
+ML,0.56,0.08
+lucene,0.6,0.07
+mw,0.53,0.08
+velocity,0.66,0.05
diff --git a/src/defects/HDP/result.xls b/src/defects/HDP/result.xls
diff --git a/src/HDP/selection/__init__.py → src/defects/HDP/selection/__init__.py b/src/HDP/selection/__init__.py → src/defects/HDP/selection/__init__.py
diff --git a/src/HDP/selection/chi2.py → src/defects/HDP/selection/chi2.py b/src/HDP/selection/chi2.py → src/defects/HDP/selection/chi2.py
diff --git a/src/defects/HDP/temp.csv b/src/defects/HDP/temp.csv
diff --git a/src/SEER/__init__.py → src/defects/NAIVE/__init__.py b/src/SEER/__init__.py → src/defects/NAIVE/__init__.py
diff --git a/src/defects/NAIVE/execute.py b/src/defects/NAIVE/execute.py
@@ -0,0 +1,106 @@
+from __future__ import print_function, division
+import os
+import sys
+
+root = os.path.join(os.getcwd().split('src')[0], 'src/defects')
+if root not in sys.path:
+    sys.path.append(root)
+
+import warnings
+from prediction.model import nbayes, rf_model
+from py_weka.classifier import classify
+from utils import *
+from metrics.abcd import abcd
+from metrics.recall_vs_loc import get_curve
+from pdb import set_trace
+import numpy as np
+from scipy.spatial.distance import pdist, squareform
+import pandas
+from plot.effort_plot import effort_plot
+from tabulate import tabulate
+
+def weight_training(test_instance, training_instance):
+    head = training_instance.columns
+    new_train = training_instance[head[:-1]]
+    new_train = (new_train - test_instance[head[:-1]].min()) / (test_instance[head[:-1]].max() - test_instance[head[:-1]].min())
+    new_train[head[-1]] = training_instance[head[-1]]
+    return new_train
+
+
+def predict_defects(train, test):
+
+    actual = test[test.columns[-1]].values.tolist()
+    actual = [1 if act == "T" else 0 for act in actual]
+    predicted, distr = rf_model(train, test)
+    return actual, predicted, distr
+
+
+def bellw(source, target, n_rep=12):
+    """
+    TNB: Transfer Naive Bayes
+    :param source:
+    :param target:
+    :param n_rep: number of repeats
+    :return: result
+    """
+    result = dict()
+    for tgt_name, tgt_path in target.iteritems():
+        stats = []
+        charts = []
+        print("{} \r".format(tgt_name[0].upper() + tgt_name[1:]))
+        val = []
+        for src_name, src_path in source.iteritems():
+            if not src_name == tgt_name:
+
+                src = list2dataframe(src_path.data)
+                tgt = list2dataframe(tgt_path.data)
+
+                pd, pf, g, auc = [], [], [], []
+                for _ in xrange(n_rep):
+                    _train = weight_training(test_instance=tgt, training_instance=src)
+                    __test = (tgt[tgt.columns[:-1]] - tgt[tgt.columns[:-1]].min()) / (
+                        tgt[tgt.columns[:-1]].max() - tgt[tgt.columns[:-1]].min())
+                    __test[tgt.columns[-1]] = tgt[tgt.columns[-1]]
+                    actual, predicted, distribution = predict_defects(train=_train, test=__test)
+                    loc = tgt["$loc"].values
+                    loc = loc * 100 / np.max(loc)
+                    recall, loc, au_roc = get_curve(loc, actual, predicted, distribution)
+                    effort_plot(recall, loc,
+                                save_dest=os.path.abspath(os.path.join(root, "plot", "plots", tgt_name)),
+                                save_name=src_name)
+                    p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution)
+
+                    pd.append(p_d)
+                    pf.append(p_f)
+                    g.append(_g)
+                    auc.append(int(auroc))
+                stats.append([src_name, int(np.mean(pd)), int(np.std(pd)),
+                              int(np.mean(pf)), int(np.std(pf)),
+                              int(np.mean(auc)), int(np.std(auc))])  # ,
+
+        stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True),  # Sort by G Score
+                                 columns=["Name", "Pd (Mean)", "Pd (Std)",
+                                          "Pf (Mean)", "Pf (Std)",
+                                          "AUC (Mean)", "AUC (Std)"])  # ,
+        # "G (Mean)", "G (Std)"])
+        print(tabulate(stats,
+                       headers=["Name", "Pd (Mean)", "Pd (Std)",
+                                "Pf (Mean)", "Pf (Std)",
+                                "AUC (Mean)", "AUC (Std)"],
+                       showindex="never",
+                       tablefmt="fancy_grid"))
+
+        result.update({tgt_name: stats})
+    return result
+
+
+def tnb_jur():
+    from data.handler import get_all_projects
+    all = get_all_projects()
+    # set_trace()
+    apache = all["Apache"]
+    return bellw(apache, apache, n_rep=10)
+
+
+if __name__ == "__main__":
+    tnb_jur()
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,4 @@ @@
     .DS_Store
     .idea/
     .idea/*
+    *.json