diff --git a/Demo/DemoConsoleApp/Examples/LibrasClassificationESNDesigner.cs b/Demo/DemoConsoleApp/Examples/LibrasClassificationESNDesigner.cs index c5ca164..a03b99a 100644 --- a/Demo/DemoConsoleApp/Examples/LibrasClassificationESNDesigner.cs +++ b/Demo/DemoConsoleApp/Examples/LibrasClassificationESNDesigner.cs @@ -1,7 +1,7 @@ using System; using RCNet.Neural.Activation; -using RCNet.Neural.Data.Coders.AnalogToSpiking; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.SM; using RCNet.Neural.Network.SM.Preprocessing; using RCNet.Neural.Network.SM.Preprocessing.Input; @@ -43,10 +43,10 @@ public void Run() new ExternalFieldSettings("coord_ordinate", new RealFeatureFilterSettings()) ); //Simplified readout layer configuration - ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(StateMachineDesigner.CreateSingleLayerRegrNet(new IdentitySettings(), 5, 400), - 0.0825d, - 1, + ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(new CrossvalidationSettings(0.0825d, CrossvalidationSettings.AutoFolds, 1), + StateMachineDesigner.CreateSingleLayerRegrNet(new IdentitySettings(), 5, 400), "Hand movement", + new NetworkClusterSecondLevelCompSettings(new CrossvalidationSettings(0.25d, CrossvalidationSettings.AutoFolds, 2), StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 1, 5, 400)), "curved swing", "horizontal swing", "vertical swing", diff --git a/Demo/DemoConsoleApp/Examples/LibrasClassificationLSMDesigner.cs b/Demo/DemoConsoleApp/Examples/LibrasClassificationLSMDesigner.cs index 4404c3b..d877b03 100644 --- a/Demo/DemoConsoleApp/Examples/LibrasClassificationLSMDesigner.cs +++ b/Demo/DemoConsoleApp/Examples/LibrasClassificationLSMDesigner.cs @@ -2,6 +2,7 @@ using RCNet.Neural.Activation; using RCNet.Neural.Data.Coders.AnalogToSpiking; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.SM; using RCNet.Neural.Network.SM.Preprocessing; using RCNet.Neural.Network.SM.Preprocessing.Input; @@ -95,10 +96,10 @@ public void Run(InputEncoder.SpikingInputEncodingRegime spikesEncodingRegime) } //Simplified readout layer configuration - ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 2, 5, 400), - 0.0825d, - 1, + ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(new CrossvalidationSettings(0.0825d, 0, 1), + StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 2, 5, 400), "Hand movement", + null, "curved swing", "horizontal swing", "vertical swing", diff --git a/Demo/DemoConsoleApp/Examples/LibrasClassificationNPBypassedDesigner.cs b/Demo/DemoConsoleApp/Examples/LibrasClassificationNPBypassedDesigner.cs index d3e6ab2..cc7ba79 100644 --- a/Demo/DemoConsoleApp/Examples/LibrasClassificationNPBypassedDesigner.cs +++ b/Demo/DemoConsoleApp/Examples/LibrasClassificationNPBypassedDesigner.cs @@ -2,6 +2,7 @@ using RCNet.Neural.Activation; using RCNet.Neural.Data.Coders.AnalogToSpiking; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.SM; using RCNet.Neural.Network.SM.Preprocessing; using RCNet.Neural.Network.SM.Preprocessing.Input; @@ -37,10 +38,13 @@ public void Run() { //Create StateMachine configuration //Simplified readout layer configuration using FF-network having 2 hidden layers as the classifier - ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 2, 5, 400), - 0.0825d, - 1, + ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateClassificationReadoutCfg(new CrossvalidationSettings(0.0825d, 0, 1), + StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 2, 5, 400), "Hand movement", + new NetworkClusterSecondLevelCompSettings(new CrossvalidationSettings(0.25d, CrossvalidationSettings.AutoFolds, 2), + StateMachineDesigner.CreateMultiLayerRegrNet(10, new LeakyReLUSettings(), 1, 5, 400), + TrainedNetworkCluster.SecondLevelCompMode.SecondLevelOutputOnly + ), "curved swing", "horizontal swing", "vertical swing", diff --git a/Demo/DemoConsoleApp/Examples/TTOOForecastDesigner.cs b/Demo/DemoConsoleApp/Examples/TTOOForecastDesigner.cs index caa2d13..80f02f8 100644 --- a/Demo/DemoConsoleApp/Examples/TTOOForecastDesigner.cs +++ b/Demo/DemoConsoleApp/Examples/TTOOForecastDesigner.cs @@ -2,6 +2,7 @@ using RCNet.Neural.Activation; using RCNet.Neural.Data.Coders.AnalogToSpiking; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.SM; using RCNet.Neural.Network.SM.Preprocessing.Input; using RCNet.Neural.Network.SM.Preprocessing.Neuron.Predictor; @@ -33,9 +34,9 @@ public void Run() new ExternalFieldSettings("Adj Close", new RealFeatureFilterSettings()) ); //Simplified readout layer configuration - ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateForecastReadoutCfg(StateMachineDesigner.CreateSingleLayerRegrNet(new IdentitySettings(), 2, 1000), - 0.1d, - 1, + ReadoutLayerSettings readoutCfg = StateMachineDesigner.CreateForecastReadoutCfg(new CrossvalidationSettings(0.1d, 0, 1), + StateMachineDesigner.CreateSingleLayerRegrNet(new IdentitySettings(), 2, 1000), + null, "High", "Low" ); diff --git a/Demo/DemoConsoleApp/Examples/TTOOForecastFromScratch.cs b/Demo/DemoConsoleApp/Examples/TTOOForecastFromScratch.cs index 681f930..1428b24 100644 --- a/Demo/DemoConsoleApp/Examples/TTOOForecastFromScratch.cs +++ b/Demo/DemoConsoleApp/Examples/TTOOForecastFromScratch.cs @@ -4,6 +4,7 @@ using RCNet.Neural.Activation; using RCNet.Neural.Data.Coders.AnalogToSpiking; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.NonRecurrent.FF; using RCNet.Neural.Network.SM; using RCNet.Neural.Network.SM.Preprocessing; @@ -238,10 +239,10 @@ NeuralPreprocessorSettings CreatePreprocessorCfg(string resInstName, string resS /// /// Creates readout layer configuration /// - /// Specifies what part of available data to be used as test data + /// Specifies what part of available data to be used as the fold data /// Number of regression attempts. Each readout network will try to learn numOfAttempts times /// Number of training epochs within an attempt - ReadoutLayerSettings CreateReadoutLayerCfg(double testDataRatio, int numOfAttempts, int numOfEpochs) + ReadoutLayerSettings CreateReadoutLayerCfg(double foldDataRatio, int numOfAttempts, int numOfEpochs) { //For each output field we will use prediction of two networks //First network having only Identity output neuron and associated the resilient back propagation trainer @@ -262,12 +263,10 @@ ReadoutLayerSettings CreateReadoutLayerCfg(double testDataRatio, int numOfAttemp ReadoutUnitSettings highReadoutUnitCfg = new ReadoutUnitSettings("High", new ForecastTaskSettings(new RealFeatureFilterSettings())); ReadoutUnitSettings lowReadoutUnitCfg = new ReadoutUnitSettings("Low", new ForecastTaskSettings(new RealFeatureFilterSettings())); //Create readout layer configuration - ReadoutLayerSettings readoutLayerCfg = new ReadoutLayerSettings(new ReadoutUnitsSettings(highReadoutUnitCfg, + ReadoutLayerSettings readoutLayerCfg = new ReadoutLayerSettings(new CrossvalidationSettings(foldDataRatio), + new ReadoutUnitsSettings(highReadoutUnitCfg, lowReadoutUnitCfg ), - testDataRatio, - ReadoutLayerSettings.AutoFolds, - ReadoutLayerSettings.DefaultRepetitions, defaultNetworksCfg ); return readoutLayerCfg; diff --git a/Demo/DemoConsoleApp/SMDemoSettings.xml b/Demo/DemoConsoleApp/SMDemoSettings.xml index b4a5799..d608e66 100644 --- a/Demo/DemoConsoleApp/SMDemoSettings.xml +++ b/Demo/DemoConsoleApp/SMDemoSettings.xml @@ -1,6 +1,7 @@ + - + @@ -484,13 +489,12 @@ - + - + - @@ -506,10 +510,17 @@ + + + + + + - + + @@ -528,9 +539,15 @@ - + + + + + + + @@ -595,7 +612,8 @@ - + + @@ -691,7 +709,8 @@ - + + @@ -776,7 +795,8 @@ - + + @@ -803,7 +823,8 @@ - + + @@ -926,7 +947,8 @@ - + + @@ -1019,7 +1041,8 @@ - + + @@ -1109,7 +1132,8 @@ - + + @@ -1199,7 +1223,8 @@ - + + @@ -1340,7 +1365,8 @@ - + + @@ -1425,7 +1451,8 @@ - + + @@ -1596,7 +1623,8 @@ - + + @@ -1664,11 +1692,6 @@ - - - - - diff --git a/RCNet/Neural/Data/VectorBundle.cs b/RCNet/Neural/Data/VectorBundle.cs index e6a0a71..7fae08e 100644 --- a/RCNet/Neural/Data/VectorBundle.cs +++ b/RCNet/Neural/Data/VectorBundle.cs @@ -3,6 +3,7 @@ using RCNet.MathTools; using System; using System.Collections.Generic; +using System.Globalization; namespace RCNet.Neural.Data { @@ -12,6 +13,13 @@ namespace RCNet.Neural.Data [Serializable] public class VectorBundle { + //Constants + /// + /// Maximum ratio of one fold data + /// + public const double MaxRatioOfFoldData = 0.5d; + + //Attributes /// /// Collection of input vectors @@ -205,21 +213,70 @@ public static VectorBundle Load(CsvDataHolder csvData, int numOfOutputVariables) //Methods /// - /// Splits this bundle to a collection of smaller bundles. - /// Method expects length of the output vectors = 1. + /// Splits this bundle to a collection of smaller folds (sub-bundles) suitable for the cross-validation. + /// Remember that in case of binary output the length of the output vectors should be equal to 1, because + /// function keeps balanced ratios of 0 and 1 values in output vectors in each fold and takes into account + /// only the first value in the output vector. /// - /// Sub-bundle size - /// If specified and there is only one output value, method will keep balanced number of output values GE to binBorder in the each sub-bundle - /// Collection of extracted sub-bundles - public List Split(int subBundleSize, double binBorder = double.NaN) + /// Requested ratio of the samples constituting one fold (sub-bundle). + /// If specified, method keeps balanced ratios of 0 and 1 values in each fold (sub-bundle). + /// Collection of created folds (sub-bundles) + public List CreateFolds(double foldDataRatio, double binBorder = double.NaN) { - int numOfBundles = OutputVectorCollection.Count / subBundleSize; - List bundleCollection = new List(numOfBundles); - if (!double.IsNaN(binBorder) && OutputVectorCollection[0].Length == 1) + if(OutputVectorCollection.Count < 2) + { + throw new InvalidOperationException($"Insufficient number of samples ({OutputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)})."); + } + List bundleCollection = new List(); + //Fold data ratio basic correction + if (foldDataRatio > MaxRatioOfFoldData) + { + foldDataRatio = MaxRatioOfFoldData; + } + //Initial fold size estimation + int foldSize = Math.Max(1, (int)Math.Round(OutputVectorCollection.Count * foldDataRatio, 0)); + //Initial number of folds + int numOfFolds = OutputVectorCollection.Count / foldSize; + //Folds creation + if (double.IsNaN(binBorder)) + { + //No binary output + int samplesPos = 0; + for (int bundleNum = 0; bundleNum < numOfFolds; bundleNum++) + { + VectorBundle bundle = new VectorBundle(); + for (int i = 0; i < foldSize && samplesPos < OutputVectorCollection.Count; i++) + { + bundle.InputVectorCollection.Add(InputVectorCollection[samplesPos]); + bundle.OutputVectorCollection.Add(OutputVectorCollection[samplesPos]); + ++samplesPos; + } + bundleCollection.Add(bundle); + } + //Remaining samples + for (int i = 0; i < OutputVectorCollection.Count - samplesPos; i++) + { + int bundleIdx = i % bundleCollection.Count; + bundleCollection[bundleIdx].InputVectorCollection.Add(InputVectorCollection[samplesPos + i]); + bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[samplesPos + i]); + } + + } + else { + //Binary output BinDistribution refBinDistr = new BinDistribution(binBorder); refBinDistr.Update(OutputVectorCollection, 0); - //Scan + int min01 = Math.Min(refBinDistr.NumOf[0], refBinDistr.NumOf[1]); + if(min01 < 2) + { + throw new InvalidOperationException($"Insufficient bin 0 or 1 samples (less than 2)."); + } + if(numOfFolds > min01) + { + numOfFolds = min01; + } + //Scan data int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]]; int bin0SamplesPos = 0; int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]]; @@ -235,21 +292,13 @@ public List Split(int subBundleSize, double binBorder = double.NaN bin0SampleIdxs[bin0SamplesPos++] = i; } } - //Division - int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfBundles); - int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfBundles); - if (bundleBin0Count * numOfBundles > bin0SampleIdxs.Length) - { - throw new InvalidOperationException($"Insufficient bin 0 samples"); - } - if (bundleBin1Count * numOfBundles > bin1SampleIdxs.Length) - { - throw new InvalidOperationException($"Insufficient bin 1 samples"); - } + //Determine distributions of 0 and 1 for one fold + int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfFolds); + int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfFolds); //Bundles creation bin0SamplesPos = 0; bin1SamplesPos = 0; - for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++) + for (int bundleNum = 0; bundleNum < numOfFolds; bundleNum++) { VectorBundle bundle = new VectorBundle(); //Bin 0 @@ -282,32 +331,11 @@ public List Split(int subBundleSize, double binBorder = double.NaN bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]); } } - else - { - //Bundles creation - int samplesPos = 0; - for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++) - { - VectorBundle bundle = new VectorBundle(); - for (int i = 0; i < subBundleSize && samplesPos < OutputVectorCollection.Count; i++) - { - bundle.InputVectorCollection.Add(InputVectorCollection[samplesPos]); - bundle.OutputVectorCollection.Add(OutputVectorCollection[samplesPos]); - ++samplesPos; - } - bundleCollection.Add(bundle); - } - //Remaining samples - for (int i = 0; i < OutputVectorCollection.Count - samplesPos; i++) - { - int bundleIdx = i % bundleCollection.Count; - bundleCollection[bundleIdx].InputVectorCollection.Add(InputVectorCollection[samplesPos + i]); - bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[samplesPos + i]); - } - } + return bundleCollection; } + /// /// Adds data from given bundle into this bundle /// diff --git a/RCNet/Neural/Network/NonRecurrent/CrossvalidationSettings.cs b/RCNet/Neural/Network/NonRecurrent/CrossvalidationSettings.cs new file mode 100644 index 0000000..b023f4c --- /dev/null +++ b/RCNet/Neural/Network/NonRecurrent/CrossvalidationSettings.cs @@ -0,0 +1,201 @@ +using System; +using System.Globalization; +using System.Xml.Linq; + +namespace RCNet.Neural.Network.NonRecurrent +{ + /// + /// Configuration of the crossvalidation + /// + [Serializable] + public class CrossvalidationSettings : RCNetBaseSettings + { + //Constants + /// + /// Name of the associated xsd type + /// + public const string XsdTypeName = "CrossvalidationType"; + /// + /// Maximum allowed fold data ratio + /// + public const double MaxFoldDataRatio = 0.5d; + /// + /// Automatic number of folds (code) + /// + public const string AutoFoldsCode = "Auto"; + /// + /// Automatic number of folds (num) + /// + public const int AutoFolds = 0; + //Default values + /// + /// Default value of the parameter specifying required ratio of samples constituting one fold. Default value is 0.1. + /// + public const double DefaultFoldDataRatio = 0.1d; + /// + /// Default code value of the parameter specifying number of folds to be used. Default value is Auto (all available folds). + /// + public const string DefaultFoldsString = AutoFoldsCode; + /// + /// Default numeric value of the parameter specifying number of folds to be used. Default value is 0 (Auto - all available folds). + /// + public const int DefaultFoldsNum = AutoFolds; + /// + /// Default value of the parameter defining how many times the generation of whole folds on shuffled data to be repeated. This parameter multiplies the number of networks in the cluster. Default value is 1. + /// + public const int DefaultRepetitions = 1; + + //Attribute properties + /// + /// Specifies required ratio of samples constituting one fold. + /// + public double FoldDataRatio { get; } + + /// + /// Specifies number of folds to be used. + /// + public int Folds { get; } + + /// + /// Defines how many times the generation of whole folds on shuffled data to be repeated. This parameter multiplies the number of networks in the cluster. + /// + public int Repetitions { get; } + + //Constructors + /// + /// Creates an unitialized instance + /// + /// Specifies required ratio of samples constituting one fold. + /// Specifies number of folds to be used. + /// Defines how many times the generation of whole folds on shuffled data to be repeated. This parameter multiplies the number of networks in the cluster. + public CrossvalidationSettings(double foldDataRatio = DefaultFoldDataRatio, + int folds = DefaultFoldsNum, + int repetitions = DefaultRepetitions + ) + { + FoldDataRatio = foldDataRatio; + Folds = folds; + Repetitions = repetitions; + Check(); + return; + } + + /// + /// Copy constructor + /// + /// Source instance + public CrossvalidationSettings(CrossvalidationSettings source) + : this(source.FoldDataRatio, source.Folds, source.Repetitions) + { + return; + } + + /// + /// Creates an initialized instance. + /// + /// Xml data containing the settings. + public CrossvalidationSettings(XElement elem) + { + //Validation + XElement settingsElem = Validate(elem, XsdTypeName); + //Parsing + FoldDataRatio = double.Parse(settingsElem.Attribute("foldDataRatio").Value, CultureInfo.InvariantCulture); + Folds = settingsElem.Attribute("folds").Value == DefaultFoldsString ? DefaultFoldsNum : int.Parse(settingsElem.Attribute("folds").Value, CultureInfo.InvariantCulture); + Repetitions = int.Parse(settingsElem.Attribute("repetitions").Value, CultureInfo.InvariantCulture); + Check(); + return; + } + + //Properties + /// + /// Checks if settings are default + /// + public bool IsDefaultFoldDataRatio { get { return (FoldDataRatio == DefaultFoldDataRatio); } } + /// + /// Checks if settings are default + /// + public bool IsDefaultFolds { get { return (Folds == DefaultFoldsNum); } } + /// + /// Checks if settings are default + /// + public bool IsDefaultRepetitions { get { return (Repetitions == DefaultRepetitions); } } + /// + /// Identifies settings containing only default values + /// + public override bool ContainsOnlyDefaults + { + get + { + return IsDefaultFoldDataRatio && + IsDefaultFolds && + IsDefaultRepetitions; + } + } + + //Methods + /// + /// Checks consistency + /// + protected override void Check() + { + if (FoldDataRatio <= 0 || FoldDataRatio > MaxFoldDataRatio) + { + throw new ArgumentException($"Invalid FoldDataRatio {FoldDataRatio.ToString(CultureInfo.InvariantCulture)}. TestDataRatio must be GT 0 and GE {MaxFoldDataRatio.ToString(CultureInfo.InvariantCulture)}.", "FoldDataRatio"); + } + if (Folds < 0) + { + throw new ArgumentException($"Invalid Folds {Folds.ToString(CultureInfo.InvariantCulture)}. Folds must be GE to 0 (0 means Auto folds).", "Folds"); + } + if (Repetitions < 1) + { + throw new ArgumentException($"Invalid Repetitions {Repetitions.ToString(CultureInfo.InvariantCulture)}. Repetitions must be GE to 1.", "Repetitions"); + } + return; + } + + /// + /// Creates the deep copy instance of this instance. + /// + public override RCNetBaseSettings DeepClone() + { + return new CrossvalidationSettings(this); + } + + /// + /// Generates xml element containing the settings. + /// + /// Name to be used as a name of the root element. + /// Specifies whether to ommit optional nodes having set default values + /// XElement containing the settings + public override XElement GetXml(string rootElemName, bool suppressDefaults) + { + XElement rootElem = new XElement(rootElemName); + if (!suppressDefaults || !IsDefaultFoldDataRatio) + { + rootElem.Add(new XAttribute("foldDataRatio", FoldDataRatio.ToString(CultureInfo.InvariantCulture))); + } + if (!suppressDefaults || !IsDefaultFolds) + { + rootElem.Add(new XAttribute("folds", Folds == DefaultFoldsNum ? DefaultFoldsString : Folds.ToString(CultureInfo.InvariantCulture))); + } + if (!suppressDefaults || !IsDefaultRepetitions) + { + rootElem.Add(new XAttribute("repetitions", Repetitions.ToString(CultureInfo.InvariantCulture))); + } + Validate(rootElem, XsdTypeName); + return rootElem; + } + + /// + /// Generates default named xml element containing the settings. + /// + /// Specifies whether to ommit optional nodes having set default values + /// XElement containing the settings + public override XElement GetXml(bool suppressDefaults) + { + return GetXml("crossvalidation", suppressDefaults); + } + + }//CrossvalidationSettings + +}//Namespace diff --git a/RCNet/Neural/Network/NonRecurrent/NetworkClusterSecondLevelCompSettings.cs b/RCNet/Neural/Network/NonRecurrent/NetworkClusterSecondLevelCompSettings.cs index d6d410e..0cbdc68 100644 --- a/RCNet/Neural/Network/NonRecurrent/NetworkClusterSecondLevelCompSettings.cs +++ b/RCNet/Neural/Network/NonRecurrent/NetworkClusterSecondLevelCompSettings.cs @@ -17,37 +17,18 @@ public class NetworkClusterSecondLevelCompSettings : RCNetBaseSettings /// Name of the associated xsd type /// public const string XsdTypeName = "NetworkClusterSecondLevelCompType"; - /// - /// Maximum allowed test data ratio - /// - public const double MaxTestDataRatio = 0.5d; - /// - /// Automatic number of folds (code) - /// - public const string AutoFoldsCode = "Auto"; - /// - /// Automatic number of folds (num) - /// - public const int AutoFolds = 0; //Default values /// /// Default value of the parameter specifying computation mode of the cluster /// public const TrainedNetworkCluster.SecondLevelCompMode DefaultCompMode = TrainedNetworkCluster.SecondLevelCompMode.AveragedOutputs; + + //Attribute properties /// - /// Default value of the parameter specifying required test data ratio constituting one fold - /// - public const double DefaultTestDataRatio = 0.333333333d; - /// - /// Default number of folds - string code - /// - public const string DefaultFoldsString = AutoFoldsCode; - /// - /// Default number of folds - numeric code + /// Crossvalidation configuration /// - public const int DefaultFoldsNum = AutoFolds; + public CrossvalidationSettings CrossvalidationCfg { get; } - //Attribute properties /// /// 2nd level network configuration /// @@ -58,35 +39,22 @@ public class NetworkClusterSecondLevelCompSettings : RCNetBaseSettings /// public TrainedNetworkCluster.SecondLevelCompMode CompMode { get; } - /// - /// Required test data ratio constituting one fold - /// - public double TestDataRatio { get; } - - /// - /// Number of folds of 2nd level x-fold cross-validation computation - /// - public int Folds { get; } - //Constructors /// /// Creates an unitialized instance /// + /// Crossvalidation configuration /// 2nd level network configuration /// Computation mode - /// Reqired test data ratio constituing one fold - /// Number of folds of 2nd level x-fold cross-validation computation - public NetworkClusterSecondLevelCompSettings(FeedForwardNetworkSettings netCfg, - TrainedNetworkCluster.SecondLevelCompMode compMode = DefaultCompMode, - double testDataRatio = DefaultTestDataRatio, - int folds = DefaultFoldsNum - ) + public NetworkClusterSecondLevelCompSettings(CrossvalidationSettings crossvalidationCfg, + FeedForwardNetworkSettings netCfg, + TrainedNetworkCluster.SecondLevelCompMode compMode = DefaultCompMode + ) { + CrossvalidationCfg = (CrossvalidationSettings)crossvalidationCfg.DeepClone(); NetCfg = (FeedForwardNetworkSettings)netCfg.DeepClone(); CompMode = compMode; - TestDataRatio = testDataRatio; - Folds = folds; Check(); return; } @@ -96,7 +64,7 @@ public NetworkClusterSecondLevelCompSettings(FeedForwardNetworkSettings netCfg, /// /// Source instance public NetworkClusterSecondLevelCompSettings(NetworkClusterSecondLevelCompSettings source) - : this(source.NetCfg, source.CompMode, source.TestDataRatio, source.Folds) + : this(source.CrossvalidationCfg, source.NetCfg, source.CompMode) { return; } @@ -110,10 +78,9 @@ public NetworkClusterSecondLevelCompSettings(XElement elem) //Validation XElement settingsElem = Validate(elem, XsdTypeName); //Parsing + CrossvalidationCfg = new CrossvalidationSettings(settingsElem.Element("crossvalidation")); NetCfg = new FeedForwardNetworkSettings(settingsElem.Element("ff")); CompMode = (TrainedNetworkCluster.SecondLevelCompMode)Enum.Parse(typeof(TrainedNetworkCluster.SecondLevelCompMode), settingsElem.Attribute("mode").Value, true); - TestDataRatio = double.Parse(settingsElem.Attribute("testDataRatio").Value, CultureInfo.InvariantCulture); - Folds = settingsElem.Attribute("folds").Value == DefaultFoldsString ? DefaultFoldsNum : int.Parse(settingsElem.Attribute("folds").Value, CultureInfo.InvariantCulture); Check(); return; } @@ -124,14 +91,6 @@ public NetworkClusterSecondLevelCompSettings(XElement elem) /// public bool IsDefaultCompMode { get { return (CompMode == DefaultCompMode); } } /// - /// Checks if settings are default - /// - public bool IsDefaultTestDataRatio { get { return (TestDataRatio == DefaultTestDataRatio); } } - /// - /// Checks if settings are default - /// - public bool IsDefaultFolds { get { return (Folds == DefaultFoldsNum); } } - /// /// Identifies settings containing only default values /// public override bool ContainsOnlyDefaults { get { return false; } } @@ -142,14 +101,6 @@ public NetworkClusterSecondLevelCompSettings(XElement elem) /// protected override void Check() { - if (TestDataRatio <= 0 || TestDataRatio > MaxTestDataRatio) - { - throw new ArgumentException($"Invalid TestDataRatio {TestDataRatio.ToString(CultureInfo.InvariantCulture)}. TestDataRatio must be GT 0 and GE {MaxTestDataRatio.ToString(CultureInfo.InvariantCulture)}.", "TestDataRatio"); - } - if (Folds < 0) - { - throw new ArgumentException($"Invalid Folds {Folds.ToString(CultureInfo.InvariantCulture)}. Folds must be GE to 0 (0 means Auto folds).", "Folds"); - } return; } @@ -169,19 +120,11 @@ public override RCNetBaseSettings DeepClone() /// XElement containing the settings public override XElement GetXml(string rootElemName, bool suppressDefaults) { - XElement rootElem = new XElement(rootElemName, NetCfg.GetXml(suppressDefaults)); + XElement rootElem = new XElement(rootElemName, CrossvalidationCfg.GetXml(suppressDefaults), NetCfg.GetXml(suppressDefaults)); if (!suppressDefaults || !IsDefaultCompMode) { rootElem.Add(new XAttribute("mode", CompMode.ToString())); } - if (!suppressDefaults || !IsDefaultTestDataRatio) - { - rootElem.Add(new XAttribute("testDataRatio", TestDataRatio.ToString(CultureInfo.InvariantCulture))); - } - if (!suppressDefaults || !IsDefaultFolds) - { - rootElem.Add(new XAttribute("folds", Folds == DefaultFoldsNum ? DefaultFoldsString : Folds.ToString(CultureInfo.InvariantCulture))); - } Validate(rootElem, XsdTypeName); return rootElem; } diff --git a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkBuilder.cs b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkBuilder.cs index 2ba35fd..0fc0c30 100644 --- a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkBuilder.cs +++ b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkBuilder.cs @@ -187,9 +187,10 @@ private BuildingInstr DefaultRegressionController(BuildingState buildingState) public TrainedNetwork Build() { TrainedNetwork bestNetwork = null; - int lastImprovementEpoch = 0; - double lastImprovementCombinedPrecisionError = 0d; - double lastImprovementCombinedBinaryError = 0d; + int bestNetworkAttempt = 0; + int currNetworkLastImprovementEpoch = 0; + double currNetworkLastImprovementCombinedPrecisionError = 0d; + double currNetworkLastImprovementCombinedBinaryError = 0d; //Create network and trainer NonRecurrentNetUtils.CreateNetworkAndTrainer(_networkSettings, _trainingBundle.InputVectorCollection, @@ -240,14 +241,18 @@ out INonRecurrentNetworkTrainer trainer //Restart lastImprovementEpoch when new trainer's attempt started if (trainer.AttemptEpoch == 1) { - lastImprovementEpoch = trainer.AttemptEpoch; - lastImprovementCombinedPrecisionError = currNetwork.CombinedPrecisionError; - lastImprovementCombinedBinaryError = currNetwork.CombinedBinaryError; + currNetworkLastImprovementEpoch = trainer.AttemptEpoch; + currNetworkLastImprovementCombinedPrecisionError = currNetwork.CombinedPrecisionError; + currNetworkLastImprovementCombinedBinaryError = currNetwork.CombinedBinaryError; } //First initialization of the best network - bestNetwork = bestNetwork ?? currNetwork.DeepClone(); + if(bestNetwork == null) + { + bestNetwork = currNetwork.DeepClone(); + bestNetworkAttempt = trainer.Attempt; + } //RegrState instance - BuildingState regrState = new BuildingState(_networkName, _binBorder, _foldNum, _numOfFolds, _foldNetworkNum, _numOfFoldNetworks, trainer.Attempt, trainer.MaxAttempt, trainer.AttemptEpoch, trainer.MaxAttemptEpoch, currNetwork, bestNetwork, lastImprovementEpoch); + BuildingState regrState = new BuildingState(_networkName, _binBorder, _foldNum, _numOfFolds, _foldNetworkNum, _numOfFoldNetworks, trainer.Attempt, trainer.MaxAttempt, trainer.AttemptEpoch, trainer.MaxAttemptEpoch, currNetwork, currNetworkLastImprovementEpoch, bestNetwork, bestNetworkAttempt); //Call controller BuildingInstr instructions = _controller(regrState); //Better? @@ -256,15 +261,13 @@ out INonRecurrentNetworkTrainer trainer //Adopt current regression unit as a best one bestNetwork = currNetwork.DeepClone(); regrState.BestNetwork = bestNetwork; - lastImprovementEpoch = trainer.AttemptEpoch; - lastImprovementCombinedPrecisionError = currNetwork.CombinedPrecisionError; - lastImprovementCombinedBinaryError = currNetwork.CombinedBinaryError; + bestNetworkAttempt = trainer.Attempt; } - if (currNetwork.CombinedBinaryError < lastImprovementCombinedBinaryError || currNetwork.CombinedPrecisionError < lastImprovementCombinedPrecisionError) + if (currNetwork.CombinedBinaryError < currNetworkLastImprovementCombinedBinaryError || currNetwork.CombinedPrecisionError < currNetworkLastImprovementCombinedPrecisionError) { - lastImprovementEpoch = trainer.AttemptEpoch; - lastImprovementCombinedPrecisionError = currNetwork.CombinedPrecisionError; - lastImprovementCombinedBinaryError = currNetwork.CombinedBinaryError; + currNetworkLastImprovementEpoch = trainer.AttemptEpoch; + currNetworkLastImprovementCombinedPrecisionError = currNetwork.CombinedPrecisionError; + currNetworkLastImprovementCombinedBinaryError = currNetwork.CombinedBinaryError; } //Raise notification event RegressionEpochDone?.Invoke(regrState, instructions.CurrentIsBetter); @@ -341,13 +344,17 @@ public class BuildingState /// public TrainedNetwork CurrNetwork { get; } /// + /// Specifies when was lastly found an improvement of current network within the current attempt + /// + public int CurrNetworkLastImprovementEpoch { get; set; } + /// /// Contains the best network for now and related important error statistics. /// public TrainedNetwork BestNetwork { get; set; } /// - /// Specifies when was lastly found an improvement + /// Number of attempt in which was recognized the best network /// - public int LastImprovementEpoch { get; set; } + public int BestNetworkAttempt { get; set; } /// /// Creates an initialized instance @@ -363,8 +370,9 @@ public class BuildingState /// Current epoch number within the current regression attempt /// Maximum number of epochs /// Current network and related important error statistics. + /// Specifies when was lastly found an improvement of current network within the current attempt. /// The best network for now and related important error statistics. - /// Specifies when was lastly found an improvement (bestNetwork=currNetwork). + /// Number of attempt in which was recognized the best network. public BuildingState(string networkName, double binBorder, int foldNum, @@ -376,8 +384,9 @@ public BuildingState(string networkName, int epoch, int maxEpochs, TrainedNetwork currNetwork, + int currNetworkLastImprovementEpoch, TrainedNetwork bestNetwork, - int lastImprovementEpoch + int bestNetworkAttempt ) { NetworkName = networkName; @@ -391,8 +400,9 @@ int lastImprovementEpoch Epoch = epoch; MaxEpochs = maxEpochs; CurrNetwork = currNetwork; + CurrNetworkLastImprovementEpoch = currNetworkLastImprovementEpoch; BestNetwork = bestNetwork; - LastImprovementEpoch = lastImprovementEpoch; + BestNetworkAttempt = bestNetworkAttempt; return; } diff --git a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs index 518cbd1..2ae7e6e 100644 --- a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs +++ b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs @@ -322,6 +322,26 @@ private double[] ComputeClusterMemberNetworks(double[] inputVector) return outputVector; } + /* + private TrainedNetworkBuilder.BuildingInstr RegressionController(TrainedNetworkBuilder.BuildingState buildingState) + { + TrainedNetworkBuilder.BuildingInstr instructions = new TrainedNetworkBuilder.BuildingInstr + { + CurrentIsBetter = TrainedNetworkBuilder.IsBetter(buildingState.BinaryOutput, + buildingState.CurrNetwork, + buildingState.BestNetwork + ), + StopCurrentAttempt = (BinaryOutput && + buildingState.BestNetworkAttempt == buildingState.RegrAttemptNumber && + buildingState.BestNetwork.TrainingBinErrorStat.TotalErrStat.Sum == 0 && + buildingState.BestNetwork.TestingBinErrorStat.TotalErrStat.Sum == 0 && + buildingState.CurrNetwork.CombinedPrecisionError > buildingState.BestNetwork.CombinedPrecisionError + ) + }; + return instructions; + } + */ + /// /// Initialized second level networks /// @@ -339,54 +359,44 @@ private void InitSecondLevelNetworks(VectorBundle dataBundle) outputVector[0] = dataBundle.OutputVectorCollection[sampleIdx][0]; shuffledDataBundle.AddPair(inputVector, outputVector); } - shuffledDataBundle.Shuffle(new Random(0)); - //Split shuffled data into the folds - - //Test fold size - int testDataSetLength = (int)Math.Round(shuffledDataBundle.OutputVectorCollection.Count * _secondLevelCompCfg.TestDataRatio, 0); - if (testDataSetLength < 1) - { - throw new ArgumentException($"Num of resulting test samples is less than 1.", "TestDataRatio"); - } - int numOfFolds = _secondLevelCompCfg.Folds; - //Number of folds - if (numOfFolds <= 0) - { - //Auto setup - numOfFolds = shuffledDataBundle.OutputVectorCollection.Count / testDataSetLength; - } - List subBundleCollection = shuffledDataBundle.Split(testDataSetLength, BinBorder); - numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count); - //Build trained network for each fold Random random = new Random(0); - for(int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) + for (int repetitionIdx = 0; repetitionIdx < _secondLevelCompCfg.CrossvalidationCfg.Repetitions; repetitionIdx++) { - //Prepare training data bundle - VectorBundle trainingData = new VectorBundle(); - for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) + //Reshuffle the data + shuffledDataBundle.Shuffle(random); + //Split shuffled data into the folds + List subBundleCollection = shuffledDataBundle.CreateFolds(_secondLevelCompCfg.CrossvalidationCfg.FoldDataRatio, BinBorder); + int numOfFoldsToBeProcessed = Math.Min(_secondLevelCompCfg.CrossvalidationCfg.Folds <= 0 ? subBundleCollection.Count : _secondLevelCompCfg.CrossvalidationCfg.Folds, subBundleCollection.Count); + //Build trained network for each fold + for (int foldIdx = 0; foldIdx < numOfFoldsToBeProcessed; foldIdx++) { - if (bundleIdx != foldIdx) + //Prepare training data bundle + VectorBundle trainingData = new VectorBundle(); + for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { - trainingData.Add(subBundleCollection[bundleIdx]); + if (bundleIdx != foldIdx) + { + trainingData.Add(subBundleCollection[bundleIdx]); + } } + //Initialize network builder + TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(ClusterName + " - 2nd level net", + _secondLevelCompCfg.NetCfg, + (repetitionIdx * numOfFoldsToBeProcessed) + foldIdx + 1, + _secondLevelCompCfg.CrossvalidationCfg.Repetitions * numOfFoldsToBeProcessed, + 1, + 1, + trainingData, + subBundleCollection[foldIdx], + BinBorder, + random, + null + ); + //Register notification + netBuilder.RegressionEpochDone += OnRegressionEpochDone; + //Add trained network into the holder + _secondLevelNetCollection.Add(netBuilder.Build()); } - //Initialize network builder - TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(ClusterName + " - 2nd level net", - _secondLevelCompCfg.NetCfg, - foldIdx + 1, - numOfFolds, - 1, - 1, - trainingData, - subBundleCollection[foldIdx], - BinBorder, - random, - null - ); - //Register notification - netBuilder.RegressionEpochDone += OnRegressionEpochDone; - //Add trained network into the holder - _secondLevelNetCollection.Add(netBuilder.Build()); } //Init second level networks weights _secondLevelWeights = GetSoftmaxWeights(_secondLevelNetCollection, BinaryOutput); @@ -427,7 +437,7 @@ private double ComputeSecondLevelOutput(double[] memberOutputs, double firstLeve { secondLevelMemberOutputCollection[i] = _secondLevelNetCollection[i].Network.Compute(inputVector)[0]; } - return ComputeCompositeOutput(secondLevelMemberOutputCollection, _secondLevelWeights, BinaryOutput); + return ComputeCompositeOutput(secondLevelMemberOutputCollection, _secondLevelWeights, true); } /// diff --git a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkClusterBuilder.cs b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkClusterBuilder.cs index 85d72a6..d276f5d 100644 --- a/RCNet/Neural/Network/NonRecurrent/TrainedNetworkClusterBuilder.cs +++ b/RCNet/Neural/Network/NonRecurrent/TrainedNetworkClusterBuilder.cs @@ -15,17 +15,6 @@ namespace RCNet.Neural.Network.NonRecurrent /// public class TrainedNetworkClusterBuilder { - //Constants - /// - /// Maximum part of available samples useable for test purposes - /// - public const double MaxRatioOfTestData = 0.5d; - - /// - /// Minimum length of the test dataset - /// - public const int MinLengthOfTestDataset = 2; - //Events /// /// This informative event occurs every time the regression epoch is done @@ -89,48 +78,26 @@ private void OnRegressionEpochDone(TrainedNetworkBuilder.BuildingState buildingS /// /// Builds computation cluster of trained networks /// - /// Data to be used for training - /// Ratio of test data to be used (determines fold size) - /// Requested number of testing folds (determines number of cluster members). Value LE 0 causes automatic setup. - /// Defines how many times the generation of folds will be repeated. + /// Data to be used for training. Take into the account that rows in this bundle may be in random order after the Build call. + /// Crossvalidation configuration /// Output feature filter to be used for output data denormalization. public TrainedNetworkCluster Build(VectorBundle dataBundle, - double testDataRatio, - int numOfFolds, - int repetitions, + CrossvalidationSettings crossvalidationCfg, FeatureFilterBase outputFeatureFilter ) { - //Test fold size - if (testDataRatio > MaxRatioOfTestData) - { - throw new ArgumentException($"Test data ratio is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); - } - int testDataSetLength = (int)Math.Round(dataBundle.OutputVectorCollection.Count * testDataRatio, 0); - if (testDataSetLength < MinLengthOfTestDataset) - { - throw new ArgumentException($"Num of resulting test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); - } - //Number of folds - if (numOfFolds <= 0) - { - //Auto setup - numOfFolds = dataBundle.OutputVectorCollection.Count / testDataSetLength; - } //Cluster of trained networks - int numOfMembers = numOfFolds * _networkSettingsCollection.Count * repetitions; TrainedNetworkCluster cluster = new TrainedNetworkCluster(_clusterName, _dataRange, _binBorder, _secondLevelCompCfg); //Member's training - int memberIdx = 0; - for (int cycle = 0; cycle < repetitions; cycle++) + for (int repetitionIdx = 0; repetitionIdx < crossvalidationCfg.Repetitions; repetitionIdx++) { //Data split to folds - List subBundleCollection = dataBundle.Split(testDataSetLength, _binBorder); - numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count); - //Train collection of networks for each fold in the cluster. - for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) + List subBundleCollection = dataBundle.CreateFolds(crossvalidationCfg.FoldDataRatio, _binBorder); + int numOfFoldsToBeProcessed = Math.Min(crossvalidationCfg.Folds <= 0 ? subBundleCollection.Count : crossvalidationCfg.Folds, subBundleCollection.Count); + //Train collection of networks for each processing fold. + for (int foldIdx = 0; foldIdx < numOfFoldsToBeProcessed; foldIdx++) { - for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++, memberIdx++) + for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); @@ -143,8 +110,8 @@ FeatureFilterBase outputFeatureFilter } TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(_clusterName, _networkSettingsCollection[netCfgIdx], - (cycle * numOfFolds) + foldIdx + 1, - repetitions * numOfFolds, + (repetitionIdx * numOfFoldsToBeProcessed) + foldIdx + 1, + crossvalidationCfg.Repetitions * numOfFoldsToBeProcessed, netCfgIdx + 1, _networkSettingsCollection.Count, trainingData, @@ -160,12 +127,12 @@ FeatureFilterBase outputFeatureFilter cluster.AddMember(tn, subBundleCollection[foldIdx], outputFeatureFilter); }//netCfgIdx }//foldIdx - if (cycle < repetitions - 1) + if (repetitionIdx < crossvalidationCfg.Repetitions - 1) { - //Reshuffle data + //Reshuffle the data dataBundle.Shuffle(_rand); } - } + }//repetitionIdx //Make the cluster operable //Register notification cluster.RegressionEpochDone += OnRegressionEpochDone; diff --git a/RCNet/Neural/Network/SM/Readout/PredictorsMapper.cs b/RCNet/Neural/Network/SM/Readout/PredictorsMapper.cs index 68869c2..021e102 100644 --- a/RCNet/Neural/Network/SM/Readout/PredictorsMapper.cs +++ b/RCNet/Neural/Network/SM/Readout/PredictorsMapper.cs @@ -143,9 +143,9 @@ public double[] CreateVector(string readoutUnitName, double[] predictors) /// /// ReadoutUnit name /// Collection of available predictors - public List CreateVectorCollection(string readoutUnitName, List predictorsCollection) + public List CreateVectorCollection(string readoutUnitName, IEnumerable predictorsCollection) { - List vectorCollection = new List(predictorsCollection.Count); + List vectorCollection = new List(); ReadoutUnitMap rum = null; if (_mapCollection.ContainsKey(readoutUnitName)) { diff --git a/RCNet/Neural/Network/SM/Readout/ReadoutLayer.cs b/RCNet/Neural/Network/SM/Readout/ReadoutLayer.cs index 0179dc7..22f4e41 100644 --- a/RCNet/Neural/Network/SM/Readout/ReadoutLayer.cs +++ b/RCNet/Neural/Network/SM/Readout/ReadoutLayer.cs @@ -126,10 +126,17 @@ private void OnRegressionEpochDone(TrainedNetworkBuilder.BuildingState buildingS /// Collection of input predictors and associated desired output values /// Optional specific mapping of predictors to readout units /// Optional external regression controller + /// + /// Specifies random number generator's initial seek. + /// A value greater than or equal to 0 will always ensure the same initialization of the internal + /// random number generator and therefore also the same internal configuration each time the StateMachine to be instantiated. + /// A value less than 0 causes different internal configuration each time the ReadoutLayer to be Built. + /// /// Results of the regression public RegressionOverview Build(VectorBundle dataBundle, PredictorsMapper predictorsMapper = null, - TrainedNetworkBuilder.RegressionControllerDelegate controller = null + TrainedNetworkBuilder.RegressionControllerDelegate controller = null, + int randomizerSeek = 0 ) { //Basic checks @@ -199,7 +206,7 @@ public RegressionOverview Build(VectorBundle dataBundle, }); //Random object initialization - Random rand = new Random(0); + Random rand = (randomizerSeek < 0 ? new Random() : new Random(randomizerSeek)); //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection); shuffledData.Shuffle(rand); @@ -230,9 +237,7 @@ public RegressionOverview Build(VectorBundle dataBundle, //Build trained readout unit. Trained unit becomes to be the predicting cluster member _readoutUnitCollection[unitIdx] = new ReadoutUnit(unitIdx, readoutUnitBuilder.Build(readoutUnitDataBundle, - ReadoutLayerCfg.TestDataRatio, - ReadoutLayerCfg.Folds, - ReadoutLayerCfg.Repetitions, + ReadoutLayerCfg.CrossvalidationCfg, _outputFeatureFilterCollection[unitIdx] ) ); diff --git a/RCNet/Neural/Network/SM/Readout/ReadoutLayerSettings.cs b/RCNet/Neural/Network/SM/Readout/ReadoutLayerSettings.cs index 4c0bb9b..5f91369 100644 --- a/RCNet/Neural/Network/SM/Readout/ReadoutLayerSettings.cs +++ b/RCNet/Neural/Network/SM/Readout/ReadoutLayerSettings.cs @@ -1,9 +1,8 @@ -using RCNet.Neural.Network.NonRecurrent; -using System; +using System; using System.Collections.Generic; -using System.Globalization; using System.Linq; using System.Xml.Linq; +using RCNet.Neural.Network.NonRecurrent; namespace RCNet.Neural.Network.SM.Readout { @@ -18,52 +17,12 @@ public class ReadoutLayerSettings : RCNetBaseSettings /// Name of the associated xsd type /// public const string XsdTypeName = "ROutLayerType"; - /// - /// Maximum allowed test data ratio - /// - public const double MaxTestDataRatio = 0.5d; - /// - /// Automatic number of folds (code) - /// - public const string AutoFoldsCode = "Auto"; - /// - /// Automatic number of folds (num) - /// - public const int AutoFolds = 0; - //Default values - /// - /// Default number of folds - string code - /// - public const string DefaultFoldsString = AutoFoldsCode; - /// - /// Default number of folds - numeric code - /// - public const int DefaultFoldsNum = AutoFolds; - /// - /// Default number of repetitions - /// - public const int DefaultRepetitions = 1; - //Attribute properties /// - /// Specifies how big part of available samples will be used as testing samples during the training - /// - public double TestDataRatio { get; } - - /// - /// The x in the x-fold cross-validation - /// https://en.wikipedia.org/wiki/Cross-validation_(statistics) - /// Parameter has two options. - /// 0 - means auto setup to achieve full cross-validation if it is possible (related to specified TestDataRatio) - /// GT 0 - means exact number of the folds + /// Crossvalidation configuration /// - public int Folds { get; } - - /// - /// Defines how many times the generation of whole folds will be repeated - /// - public int Repetitions { get; } + public CrossvalidationSettings CrossvalidationCfg { get; } /// /// Task dependent networks settings to be applied when specific networks for readout unit are not specified @@ -71,7 +30,7 @@ public class ReadoutLayerSettings : RCNetBaseSettings public DefaultNetworksSettings DefaultNetworksCfg { get; } /// - /// Readout units settings + /// Readout units configuration /// public ReadoutUnitsSettings ReadoutUnitsCfg { get; } @@ -79,31 +38,17 @@ public class ReadoutLayerSettings : RCNetBaseSettings /// /// Creates an initialized instance. /// - /// Readout units settings - /// Specifies how big part of available samples will be used as testing samples during the training - /// The x in the x-fold cross-validation - /// Defines how many times the generation of whole folds will be repeated + /// Crossvalidation configuration + /// Readout units configuration /// Task dependent networks settings to be applied when specific networks for readout unit are not specified - public ReadoutLayerSettings(ReadoutUnitsSettings readoutUnitsCfg, - double testDataRatio, - int folds = DefaultFoldsNum, - int repetitions = DefaultRepetitions, + public ReadoutLayerSettings(CrossvalidationSettings crossvalidationCfg, + ReadoutUnitsSettings readoutUnitsCfg, DefaultNetworksSettings defaultNetworksCfg = null ) { - //Default settings - TestDataRatio = testDataRatio; - Folds = folds; - Repetitions = repetitions; + CrossvalidationCfg = (CrossvalidationSettings)crossvalidationCfg.DeepClone(); ReadoutUnitsCfg = (ReadoutUnitsSettings)readoutUnitsCfg.DeepClone(); - if (defaultNetworksCfg == null) - { - DefaultNetworksCfg = new DefaultNetworksSettings(); - } - else - { - DefaultNetworksCfg = (DefaultNetworksSettings)defaultNetworksCfg.DeepClone(); - } + DefaultNetworksCfg = defaultNetworksCfg == null ? new DefaultNetworksSettings() : (DefaultNetworksSettings)defaultNetworksCfg.DeepClone(); Check(); return; } @@ -113,7 +58,7 @@ public ReadoutLayerSettings(ReadoutUnitsSettings readoutUnitsCfg, /// /// Source instance public ReadoutLayerSettings(ReadoutLayerSettings source) - : this(source.ReadoutUnitsCfg, source.TestDataRatio, source.Folds, source.Repetitions, source.DefaultNetworksCfg) + : this(source.CrossvalidationCfg, source.ReadoutUnitsCfg, source.DefaultNetworksCfg) { return; } @@ -126,16 +71,15 @@ public ReadoutLayerSettings(ReadoutLayerSettings source) public ReadoutLayerSettings(XElement elem) { //Validation - XElement readoutLayerSettingsElem = Validate(elem, XsdTypeName); + XElement settingsElem = Validate(elem, XsdTypeName); //Parsing - TestDataRatio = double.Parse(readoutLayerSettingsElem.Attribute("testDataRatio").Value, CultureInfo.InvariantCulture); - Folds = readoutLayerSettingsElem.Attribute("folds").Value == DefaultFoldsString ? DefaultFoldsNum : int.Parse(readoutLayerSettingsElem.Attribute("folds").Value, CultureInfo.InvariantCulture); - Repetitions = int.Parse(readoutLayerSettingsElem.Attribute("repetitions").Value, CultureInfo.InvariantCulture); + //Crossvalidation + CrossvalidationCfg = new CrossvalidationSettings(settingsElem.Element("crossvalidation")); //Default networks settings - XElement defaultNetworksElem = readoutLayerSettingsElem.Elements("defaultNetworks").FirstOrDefault(); + XElement defaultNetworksElem = settingsElem.Elements("defaultNetworks").FirstOrDefault(); DefaultNetworksCfg = defaultNetworksElem == null ? new DefaultNetworksSettings() : new DefaultNetworksSettings(defaultNetworksElem); //Readout units - XElement readoutUnitsElem = readoutLayerSettingsElem.Elements("readoutUnits").First(); + XElement readoutUnitsElem = settingsElem.Elements("readoutUnits").First(); ReadoutUnitsCfg = new ReadoutUnitsSettings(readoutUnitsElem); Check(); return; @@ -153,16 +97,6 @@ public List OutputFieldNameCollection } } - /// - /// Checks if settings are default - /// - public bool IsDefaultFolds { get { return (Folds == DefaultFoldsNum); } } - - /// - /// Checks if settings are default - /// - public bool IsDefaultRepetitions { get { return (Repetitions == DefaultRepetitions); } } - /// /// Identifies settings containing only default values /// @@ -174,18 +108,6 @@ public List OutputFieldNameCollection /// protected override void Check() { - if (TestDataRatio <= 0 || TestDataRatio > MaxTestDataRatio) - { - throw new ArgumentException($"Invalid TestDataRatio {TestDataRatio.ToString(CultureInfo.InvariantCulture)}. TestDataRatio must be GT 0 and GE {MaxTestDataRatio.ToString(CultureInfo.InvariantCulture)}.", "TestDataRatio"); - } - if (Folds < 0) - { - throw new ArgumentException($"Invalid Folds {Folds.ToString(CultureInfo.InvariantCulture)}. Folds must be GE to 0 (0 means Auto folds).", "Folds"); - } - if (Repetitions < 1) - { - throw new ArgumentException($"Invalid Repetitions {Repetitions.ToString(CultureInfo.InvariantCulture)}. Repetitions must be GE to 1.", "Repetitions"); - } foreach (ReadoutUnitSettings rus in ReadoutUnitsCfg.ReadoutUnitCfgCollection) { if (rus.TaskCfg.NetworkCfgCollection.Count == 0) @@ -232,16 +154,7 @@ public override RCNetBaseSettings DeepClone() /// XElement containing the settings public override XElement GetXml(string rootElemName, bool suppressDefaults) { - XElement rootElem = new XElement(rootElemName); - rootElem.Add(new XAttribute("testDataRatio", TestDataRatio.ToString(CultureInfo.InvariantCulture))); - if (!suppressDefaults || !IsDefaultFolds) - { - rootElem.Add(new XAttribute("folds", Folds == DefaultFoldsNum ? DefaultFoldsString : Folds.ToString(CultureInfo.InvariantCulture))); - } - if (!suppressDefaults || !IsDefaultRepetitions) - { - rootElem.Add(new XAttribute("repetitions", Repetitions.ToString(CultureInfo.InvariantCulture))); - } + XElement rootElem = new XElement(rootElemName, CrossvalidationCfg.GetXml(suppressDefaults)); if (!DefaultNetworksCfg.ContainsOnlyDefaults) { rootElem.Add(DefaultNetworksCfg.GetXml(suppressDefaults)); diff --git a/RCNet/Neural/Network/SM/StateMachine.cs b/RCNet/Neural/Network/SM/StateMachine.cs index 15835a5..978f609 100644 --- a/RCNet/Neural/Network/SM/StateMachine.cs +++ b/RCNet/Neural/Network/SM/StateMachine.cs @@ -273,7 +273,7 @@ public TrainingResults Train(VectorBundle vectorBundle, TrainedNetworkBuilder.Re readoutInput = NP.InitializeAndPreprocessBundle(vectorBundle, out preprocessingOverview); } //Training of the readout layer - ReadoutLayer.RegressionOverview regressionOverview = RL.Build(readoutInput, BuildPredictorsMapper(), regressionController); + ReadoutLayer.RegressionOverview regressionOverview = RL.Build(readoutInput, BuildPredictorsMapper(), regressionController, Config.RandomizerSeek); //Return compact results return new TrainingResults(preprocessingOverview, regressionOverview); } diff --git a/RCNet/Neural/Network/SM/StateMachineDesigner.cs b/RCNet/Neural/Network/SM/StateMachineDesigner.cs index 632492a..803276a 100644 --- a/RCNet/Neural/Network/SM/StateMachineDesigner.cs +++ b/RCNet/Neural/Network/SM/StateMachineDesigner.cs @@ -1,6 +1,7 @@ using RCNet.Extensions; using RCNet.Neural.Activation; using RCNet.Neural.Data.Filter; +using RCNet.Neural.Network.NonRecurrent; using RCNet.Neural.Network.NonRecurrent.FF; using RCNet.Neural.Network.SM.Preprocessing; using RCNet.Neural.Network.SM.Preprocessing.Input; @@ -204,13 +205,13 @@ public static FeedForwardNetworkSettings CreateMultiLayerRegrNet(int hiddenLayer /// /// Creates readout layer configuration to solve forecast task /// + /// Crossvalidation configuration /// FF network configuration to be associated with readout units - /// Specifies what part of available data to be used as test data - /// Number of repetitions of the folds regression + /// Optional configuration of the 2nd level computation of the network cluster /// Readout unit name - public static ReadoutLayerSettings CreateForecastReadoutCfg(FeedForwardNetworkSettings netCfg, - double testDataRatio, - int repetitions, + public static ReadoutLayerSettings CreateForecastReadoutCfg(CrossvalidationSettings crossvalidationCfg, + FeedForwardNetworkSettings netCfg, + NetworkClusterSecondLevelCompSettings cluster2ndLevelComputingCfg, params string[] unitName ) { @@ -223,10 +224,8 @@ params string[] unitName { unitCfgCollection.Add(new ReadoutUnitSettings(name, new ForecastTaskSettings(new RealFeatureFilterSettings()))); } - return new ReadoutLayerSettings(new ReadoutUnitsSettings(unitCfgCollection), - testDataRatio, - ReadoutLayerSettings.AutoFolds, - repetitions, + return new ReadoutLayerSettings(crossvalidationCfg, + new ReadoutUnitsSettings(unitCfgCollection, cluster2ndLevelComputingCfg), new DefaultNetworksSettings(null, new ForecastNetworksSettings(netCfg)) ); } @@ -234,15 +233,15 @@ params string[] unitName /// /// Creates readout layer configuration to solve classification task /// + /// Crossvalidation configuration /// FF network configuration to be associated with readout units - /// Specifies what part of available data to be used as test data - /// Number of repetitions of the folds regression /// Name of the "one winner" group encapsulating classification readout units + /// Optional configuration of the 2nd level computation of the network cluster /// Readout unit name - public static ReadoutLayerSettings CreateClassificationReadoutCfg(FeedForwardNetworkSettings netCfg, - double testDataRatio, - int repetitions, + public static ReadoutLayerSettings CreateClassificationReadoutCfg(CrossvalidationSettings crossvalidationCfg, + FeedForwardNetworkSettings netCfg, string oneWinnerGroupName, + NetworkClusterSecondLevelCompSettings cluster2ndLevelComputingCfg, params string[] unitName ) { @@ -255,10 +254,8 @@ params string[] unitName { unitCfgCollection.Add(new ReadoutUnitSettings(name, new ClassificationTaskSettings(oneWinnerGroupName))); } - return new ReadoutLayerSettings(new ReadoutUnitsSettings(unitCfgCollection), - testDataRatio, - ReadoutLayerSettings.AutoFolds, - repetitions, + return new ReadoutLayerSettings(crossvalidationCfg, + new ReadoutUnitsSettings(unitCfgCollection, cluster2ndLevelComputingCfg), new DefaultNetworksSettings(new ClassificationNetworksSettings(netCfg), null) ); } diff --git a/RCNet/RCNetTypes.xsd b/RCNet/RCNetTypes.xsd index b66fad0..2b4c1e9 100644 --- a/RCNet/RCNetTypes.xsd +++ b/RCNet/RCNetTypes.xsd @@ -1238,9 +1238,29 @@ + + + + + Required ratio of samples constituting one fold. Default value is 0.1. + + + + + Number of folds to be used. Default value is Auto (all available folds). + + + + + Defines how many times the generation of whole folds on shuffled data to be repeated. This parameter multiplies the number of networks in the cluster. Default value is 1. + + + + + @@ -1248,16 +1268,6 @@ Computation mode of the cluster. Default value is AveragedOutputs. - - - Required test data ratio constituting one fold. Default value is 0.333333333. - - - - - Number of folds of 2nd level x-fold cross-validation computation. Default value is Auto - - @@ -2566,6 +2576,7 @@ + @@ -2574,17 +2585,6 @@ - - - - Default value is Auto - - - - - Default value is 1 - - diff --git a/Readme.md b/Readme.md index f61616d..f2fb7c2 100644 --- a/Readme.md +++ b/Readme.md @@ -31,7 +31,7 @@ Demo application has internally implemented the xml validation so connection of #### Time-series classification results comparison SMDemoSettings.xml currently includes several classification problems from the: [Anthony Bagnall, Jason Lines, William Vickers and Eamonn Keogh, The UEA & UCR Time Series Classification Repository, www.timeseriesclassification.com](https://timeseriesclassification.com) -site and State Machine is surprisingly able to achieve better results then the best classification algorithms referenced on that website. +site and State Machine is surprisingly able to achieve competetive results to the best classification algorithms referenced on that website.
|Dataset|State Machine Accuracy|Best Ref. Accuracy|Best Ref. Algorithm| @@ -40,7 +40,7 @@ site and State Machine is surprisingly able to achieve better results then the b |[Worms](https://timeseriesclassification.com/description.php?Dataset=Worms)|83.12%|73.49%|BOSS| |[BeetleFly](https://timeseriesclassification.com/description.php?Dataset=BeetleFly)|100%|94.85%|BOSS| |[BirdChicken](https://timeseriesclassification.com/description.php?Dataset=BirdChicken)|100%|98.4%|BOSS| -|[ProximalPhalanx](https://timeseriesclassification.com/description.php?Dataset=ProximalPhalanxOutlineAgeGroup)|88.78%|88.09%|ST| +|[ProximalPhalanx](https://timeseriesclassification.com/description.php?Dataset=ProximalPhalanxOutlineAgeGroup)|87.8%|88.09%|ST| |[Yoga](https://timeseriesclassification.com/description.php?Dataset=Yoga)|91.27%|90.99%|BOSS| |[Libras](https://timeseriesclassification.com/description.php?Dataset=Libras)|92.78%|89.4%|DTWi| @@ -179,7 +179,7 @@ See the [wiki pages.](https://en.wikipedia.org/wiki/Biological_neuron_model) ||| |[TrainedNetwork](./RCNet/Neural/Network/NonRecurrent/TrainedNetwork.cs)|Encapsulates trained non-recurrent (Feed forward or Parallel perceptron) network and related error statistics.| |[TrainedNetworkBuilder](./RCNet/Neural/Network/NonRecurrent/TrainedNetworkBuilder.cs)|Builds single trained (Feed forward or Parallel perceptron) network. Performs training epochs and offers control to user to evaluate the network.| -|[TrainedNetworkCluster](./RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs)|Encapsulates set of trained non-recurrent networks (cluster of TrainedNetwork instances) and related error statistics. Offers weighted cluster prediction and also publics all inner members sub-predictions.| +|[TrainedNetworkCluster](./RCNet/Neural/Network/NonRecurrent/TrainedNetworkCluster.cs)|Encapsulates set of trained non-recurrent networks (cluster of TrainedNetwork instances) and related error statistics. Offers sub-predictions of inner member networks, weighted prediction and also prediction of the 2nd level network.| |[TrainedNetworkClusterBuilder](./RCNet/Neural/Network/NonRecurrent/TrainedNetworkClusterBuilder.cs)|Builds cluster of trained networks based on x-fold cross validation approach. Each fold can have associated number of various networks.| ### State Machine Sub-Components