leondavi · leondavi · Mar 10, 2024 · Jan 16, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/NerlnetJupyterLaunch.sh b/NerlnetJupyterLaunch.sh
@@ -139,5 +139,7 @@ cd $JUPDIR
 generate_set_jupyter_env
 generate_readme_md
 
+# TODO add networkx and pygraphviz installations! 
+
 jupyter-lab
 
diff --git a/examples/NerlnetExperiment.ipynb b/examples/NerlnetExperiment.ipynb
diff --git a/inputJsonsFiles/ConnectionMap/conn_test_synt_1d_2c_2s_4r_4w.json b/inputJsonsFiles/ConnectionMap/conn_test_synt_1d_2c_2s_4r_4w.json
@@ -0,0 +1,9 @@
+{
+    "connectionsMap":
+    {
+        "r1":["mainServer", "r2"],
+        "r2":["r3", "s1"],
+        "r3":["r4", "c1" , "s2"],
+        "r4":["r1", "c2"]
+    }
+}
diff --git a/inputJsonsFiles/DistributedConfig/dc_test_synt_1d_2c_2s_4r_4w.json b/inputJsonsFiles/DistributedConfig/dc_test_synt_1d_2c_2s_4r_4w.json
@@ -0,0 +1,124 @@
+{
+    "nerlnetSettings": {
+        "frequency": "60",
+        "batchSize": "50"
+    },
+    "mainServer": {
+        "port": "8081",
+        "args": ""
+    },
+    "apiServer": {
+        "port": "8099",
+        "args": ""
+    },
+    "devices": [
+        {
+            "name": "pc1",
+            "ipv4": "10.0.0.11",
+            "entities": "c1,c2,r2,r1,r3,r4,s1,s2,apiServer,mainServer"
+        }
+    ],
+    "routers": [
+        {
+            "name": "r1",
+            "port": "8086",
+            "policy": "0"
+        },
+        {
+            "name": "r2",
+            "port": "8087",
+            "policy": "0"
+        },
+        {
+            "name": "r3",
+            "port": "8088",
+            "policy": "0"
+        },
+        {
+            "name": "r4",
+            "port": "8089",
+            "policy": "0"
+        }
+    ],
+    "sources": [
+        {
+            "name": "s1",
+            "port": "8085",
+            "frequency": "300",
+            "policy": "0",
+            "epochs": "1",
+            "type": "0"
+        },
+        {
+            "name": "s2",
+            "port": "8090",
+            "frequency": "300",
+            "policy": "0",
+            "epochs": "1",
+            "type": "0"
+        }
+    ],
+    "clients": [
+        {
+            "name": "c1",
+            "port": "8083",
+            "workers": "w1,w2"
+        },
+        {
+            "name": "c2",
+            "port": "8084",
+            "workers": "w3,w4"
+        }
+    ],
+    "workers": [
+        {
+            "name": "w1",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w2",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w3",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        },
+        {
+            "name": "w4",
+            "model_sha": "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa"
+        }
+    ],
+    "model_sha": {
+        "d8df752e0a2e8f01de8f66e9cec941cdbc65d144ecf90ab7713e69d65e7e82aa": {
+            "modelType": "0",
+            "_doc_modelType": " nn:0 | approximation:1 | classification:2 | forecasting:3 | image-classification:4 | text-classification:5 | text-generation:6 | auto-association:7 | autoencoder:8 | ae-classifier:9 |",
+            "layersSizes": "5,10,5,3,3",
+            "_doc_layersSizes": "List of postive integers [L0, L1, ..., LN]",
+            "layerTypesList": "1,3,3,3,5",
+            "_doc_LayerTypes": " Default:0 | Scaling:1 | CNN:2 | Perceptron:3 | Pooling:4 | Probabilistic:5 | LSTM:6 | Reccurrent:7 | Unscaling:8 |",
+            "layers_functions": "1,6,6,11,4",
+            "_doc_layers_functions_activation": " Threshold:1 | Sign:2 | Logistic:3 | Tanh:4 | Linear:5 | ReLU:6 | eLU:7 | SeLU:8 | Soft-plus:9 | Soft-sign:10 | Hard-sigmoid:11 |",
+            "_doc_layer_functions_pooling": " none:1 | Max:2 | Avg:3 |",
+            "_doc_layer_functions_probabilistic": " Binary:1 | Logistic:2 | Competitive:3 | Softmax:4 |",
+            "_doc_layer_functions_scaler": " none:1 | MinMax:2 | MeanStd:3 | STD:4 | Log:5 |",
+            "lossMethod": "2",
+            "_doc_lossMethod": " SSE:1 | MSE:2 | NSE:3 | MinkowskiE:4 | WSE:5 | CEE:6 |",
+            "lr": "0.01",
+            "_doc_lr": "Positve float",
+            "epochs": "1",
+            "_doc_epochs": "Positve Integer",
+            "optimizer": "5",
+            "_doc_optimizer": " GD:0 | CGD:1 | SGD:2 | QuasiNeuton:3 | LVM:4 | ADAM:5 |",
+            "optimizerArgs": "",
+            "_doc_optimizerArgs": "String",
+            "infraType": "0",
+            "_doc_infraType": " opennn:0 | wolfengine:1 |",
+            "distributedSystemType": "0",
+            "_doc_distributedSystemType": " none:0 | fedClientAvg:1 | fedServerAvg:2 |",
+            "distributedSystemArgs": "",
+            "_doc_distributedSystemArgs": "String",
+            "distributedSystemToken": "none",
+            "_doc_distributedSystemToken": "Token that associates distributed group of workers and parameter-server"
+        }
+    }
+}
diff --git a/inputJsonsFiles/experimentsFlow/exp_new_arc.json b/inputJsonsFiles/experimentsFlow/exp_new_arc.json
@@ -0,0 +1,64 @@
+{
+
+    "experimentName": "synthetic_3_gausians",
+    "batchSize": 50,
+    "csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv",
+    "numOfFeatures": "5",
+    "numOfLabels": "3",
+    "headersNames": "Norm(0:1),Norm(4:1),Norm(10:3)",
+    "Phases": 
+    [
+        {
+            "phaseName": "training1",
+            "phaseType": "training",
+            "sourcePieces":
+            [
+                {
+                "sourceName": "s1",
+                "startingSample": "0",
+                "numOfBatches": "10",
+                "workers": "w1,w2,w3,w4"
+                },
+                {
+                "sourceName": "s2",
+                "startingSample": "500",
+                "numOfBatches": "10",
+                "workers": "w1,w2,w3,w4"
+                }
+            ]  
+        },
+        {
+            "phaseName": "validation1",
+            "phaseType": "prediction",
+            "sourcePieces":
+            [
+                {
+                "sourceName": "s1",
+                "startingSample": "1000",
+                "numOfBatches": "5",
+                "workers": "w1,w2,w3,w4"
+                },
+                {
+                "sourceName": "s2",
+                "startingSample": "1250",
+                "numOfBatches": "5",
+                "workers": "w1,w2,w3,w4"
+                }
+            ] 
+        },
+        {
+            "phaseName": "prediction1",
+            "phaseType": "prediction",
+            "sourcePieces":
+            [
+                {
+                "sourceName": "s1",
+                "startingSample": "1500",
+                "numOfBatches": "5",
+                "workers": "w1,w2,w3,w4"
+                }
+            ]
+
+        }
+    ]  
+}
diff --git a/inputJsonsFiles/experimentsFlow/exp_test_synt_1d_2c_1s_4r_4w new.json b/inputJsonsFiles/experimentsFlow/exp_test_synt_1d_2c_1s_4r_4w new.json
@@ -0,0 +1,38 @@
+{
+   "experimentName": "synthetic_3_gausians",
+   "batchSize": 50,
+   "csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv",
+   "numOfFeatures": "5",
+   "numOfLabels": "3",
+   "headersNames": "Norm(0:1),Norm(4:1),Norm(10:3)",
+   "Phases": 
+   [
+      {
+         "phaseName": "training_phase",
+         "phaseType": "training",
+         "sourcePieces":
+         [
+            {
+            "sourceName": "s1",
+            "startingSample": "0",
+            "numOfBatches": "40",
+            "workers": "w1,w2,w3,w4"
+            }
+         ]  
+      },
+      {
+         "phaseName": "prediction_phase",
+         "phaseType": "prediction",
+         "sourcePieces":
+         [
+            {
+            "sourceName": "s1",
+            "startingSample": "2000",
+            "numOfBatches": "40",
+            "workers": "w1,w2,w3,w4"
+            }
+         ]    
+      }
+   ]  
+}
+
@@ -7,5 +7,6 @@ namespace nerlnet
 #define DIM_Z_IDX 2
 
 #define NERLNIF_ATOM_STR "nerlnif"
+#define NERLNIF_NAN_ATOM_STR "nan"
 
 }
@@ -71,6 +71,7 @@ static void parse_layer_sizes_str(std::string &layer_sizes_str, std::vector<int>
         case LAYER_TYPE_DEFAULT:
         case LAYER_TYPE_SCALING:
         case LAYER_TYPE_UNSCALING:
+        case LAYER_TYPE_PROBABILISTIC:
         case SIMPLE_PARSING:{
             out_layer_sizes_params[i].dimx = std::stoi(layer_sizes_strs_vec[i]); 
             break;

@@ -37,15 +37,17 @@ void* trainFun(void* arg)
     // Stop the timer and calculate the time took for training
     high_resolution_clock::time_point  stop = high_resolution_clock::now();
     auto duration = duration_cast<microseconds>(stop - TrainNNptr->start_time);
+    ERL_NIF_TERM loss_val_term;
 
     if(isnan(loss_val)  ) 
     {
-        loss_val = -1.0;
+        loss_val_term = enif_make_atom(env , NERLNIF_NAN_ATOM_STR);
         cout << NERLNIF_PREFIX << "loss val = nan , setting NN weights to random values" <<std::endl;
         neural_network_ptr->set_parameters_random();
     }
-    //cout << "returning training values"<<std::endl;
-    ERL_NIF_TERM loss_val_term = enif_make_double(env, loss_val);
+    else {
+        loss_val_term = enif_make_double(env, loss_val);
+    }
     ERL_NIF_TERM train_time = enif_make_double(env, duration.count());
     ERL_NIF_TERM nerlnif_atom = enif_make_atom(env, NERLNIF_ATOM_STR);
 

@@ -2,7 +2,7 @@
 -include_lib("kernel/include/logger.hrl").
 -include("nerlTensor.hrl").
 
--export([init/0,nif_preload/0,get_active_models_ids_list/0, train_nif/3,update_nerlworker_train_params_nif/6,call_to_train/3,predict_nif/3,call_to_predict/6,get_weights_nif/1,printTensor/2]).
+-export([init/0,nif_preload/0,get_active_models_ids_list/0, train_nif/3,update_nerlworker_train_params_nif/6,call_to_train/5,predict_nif/3,call_to_predict/5,get_weights_nif/1,printTensor/2]).
 -export([call_to_get_weights/2,call_to_set_weights/2]).
 -export([decode_nif/2, nerltensor_binary_decode/2]).
 -export([encode_nif/2, nerltensor_encode/5, nerltensor_conversion/2, get_all_binary_types/0, get_all_nerltensor_list_types/0]).
@@ -46,7 +46,7 @@ train_nif(_ModelID,_DataTensor,_Type) ->
 update_nerlworker_train_params_nif(_ModelID,_LearningRate,_Epochs,_OptimizerType,_OptimizerArgs,_LossMethod) ->
       exit(nif_library_not_loaded).
 
-call_to_train(ModelID, {DataTensor, Type}, WorkerPid)-> 
+call_to_train(ModelID, {DataTensor, Type}, WorkerPid , BatchID , SourceName)-> 
       % io:format("before train  ~n "),
       % io:format("DataTensor= ~p~n ",[nerltensor_conversion({DataTensor, Type}, erl_float)]),
        %{FakeTensor, Type} = nerltensor_conversion({[2.0,4.0,1.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0], erl_float}, float),
@@ -56,29 +56,29 @@ call_to_train(ModelID, {DataTensor, Type}, WorkerPid)->
             {nerlnif , LossValue , TrainTime}->
                   % io:format("Ret= ~p~n ",[Ret]),
                   %io:format("WorkerPid,{loss, Ret}: ~p , ~p ~n ",[WorkerPid,{loss, Ret}]),
-                  gen_statem:cast(WorkerPid,{loss, LossValue , TrainTime}) % TODO @Haran - please check what worker does with this Ret value 
+                  LossTensor = nerltensor_encode(1.0,1.0,1.0,[LossValue], erl_float), %% ALWAYS {[1.0,1.0,1.0,LOSS_VALUE] , <TYPE>}
+                  gen_statem:cast(WorkerPid,{loss, LossTensor , TrainTime , BatchID , SourceName}) % TODO @Haran - please check what worker does with this Ret value 
             after ?TRAIN_TIMEOUT ->  %TODO inspect this timeout 
                   ?LOG_ERROR("Worker train timeout reached! setting loss = -1~n "),
-                  gen_statem:cast(WorkerPid,{loss, timeout}) %% Define train timeout state 
+                  gen_statem:cast(WorkerPid,{loss, timeout , SourceName}) %% Define train timeout state 
       end.
 
-call_to_predict(ModelID, BatchTensor, Type, WorkerPid,CSVname, BatchID)->
-      % io:format("satrting pred_nif~n"),
+call_to_predict(ModelID, {BatchTensor, Type}, WorkerPid, BatchID , SourceName)->
       ok = predict_nif(ModelID, BatchTensor, Type),
       receive
 
             {nerlnif , PredNerlTensor, NewType, TimeTook}-> %% nerlnif atom means a message from the nif implementation
                   % io:format("pred_nif done~n"),
                   % {PredTen, _NewType} = nerltensor_conversion({PredNerlTensor, NewType}, erl_float),
                   % io:format("Pred returned: ~p~n", [PredNerlTensor]),
-                  gen_statem:cast(WorkerPid,{predictRes,PredNerlTensor, NewType, TimeTook,CSVname, BatchID});
+                  gen_statem:cast(WorkerPid,{predictRes,PredNerlTensor, NewType, TimeTook, BatchID , SourceName});
             Error ->
                   ?LOG_ERROR("received wrong prediction_nif format: ~p" ,[Error]),
                   throw("received wrong prediction_nif format")
             after ?PREDICT_TIMEOUT -> 
                  % worker miss predict batch  TODO - inspect this code
                   ?LOG_ERROR("Worker prediction timeout reached! ~n "),
-                  gen_statem:cast(WorkerPid,{predictRes, nan, CSVname, BatchID})
+                  gen_statem:cast(WorkerPid,{predictRes, nan, BatchID , SourceName})
       end.
 
 call_to_get_weights(ThisEts, ModelID)->