diff --git a/moa/src/main/java/moa/DoTask.java b/moa/src/main/java/moa/DoTask.java index ad7174fc2..09ef44113 100644 --- a/moa/src/main/java/moa/DoTask.java +++ b/moa/src/main/java/moa/DoTask.java @@ -58,16 +58,40 @@ public class DoTask { */ public static boolean isJavaVersionOK() { boolean isJavaVersionOK = true; - String version = System.getProperty("java.version"); - char major = version.charAt(0); - char minor = version.length() > 1 ? version.charAt(2): '0'; - if (major == '1' && minor < '6') { + String versionStr = System.getProperty("java.version"); + String[] parts; + double version; + if (versionStr.contains(".")) { + parts = versionStr.split("\\."); + } + else { + parts = new String[]{versionStr}; + } + if (parts.length == 1) { + try { + version = Double.parseDouble(parts[0]); + } + catch (Exception e) { + System.err.println("Unparsable Java version: " + versionStr); + return false; + } + } + else { + try { + version = Double.parseDouble(parts[0]) + Double.parseDouble(parts[1]) / 10; + } + catch (Exception e) { + System.err.println("Unparsable Java version: " + versionStr); + return false; + } + } + if (version < 1.8) { isJavaVersionOK = false; System.err.println(); System.err.println(Globals.getWorkbenchInfoString()); System.err.println(); - System.err.print("JDK 1.6.0 or higher is required to run MOA. "); - System.err.println("JDK version " + version + " found"); + System.err.print("Java 8 or higher is required to run MOA. "); + System.err.println("Java version " + versionStr + " found"); } return isJavaVersionOK; } @@ -82,11 +106,11 @@ public static boolean isJavaVersionOK() { */ public static boolean isWekaVersionOK() { Version version = new Version(); - if (version.isOlder("3.7.1")) { + if (version.isOlder("3.9.2")) { System.err.println(); System.err.println(Globals.getWorkbenchInfoString()); System.err.println(); - System.err.print("Weka 3.7.1 or higher is required to run MOA. "); + System.err.print("Weka 3.9.2 or higher is required to run MOA. "); System.err.println("Weka version " + Version.VERSION + " found"); return false; } else { diff --git a/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNominalAttributeClassObserver.java b/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNominalAttributeClassObserver.java new file mode 100644 index 000000000..69e4c9f8f --- /dev/null +++ b/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNominalAttributeClassObserver.java @@ -0,0 +1,211 @@ +package moa.classifiers.core.attributeclassobservers; + +import moa.classifiers.core.AttributeSplitSuggestion; +import moa.classifiers.core.conditionaltests.NominalAttributeMultiwayTest; +import moa.classifiers.core.splitcriteria.SplitCriterion; +import moa.core.DoubleVector; +import moa.core.ObjectRepository; +import moa.options.AbstractOptionHandler; +import moa.tasks.TaskMonitor; +import moa.classifiers.core.conditionaltests.NominalAttributeBinaryTest; + +import java.io.Serializable; + +public class HoeffdingNominalAttributeClassObserver extends AbstractOptionHandler implements + DiscreteAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + protected class Node implements Serializable { + + private static final long serialVersionUID = 1L; + + // The split point to use + public double cut_point; + + // statistics + public DoubleVector statistics = new DoubleVector(); + + + // Child node + public HoeffdingNominalAttributeClassObserver.Node child; + + + public Node(double val, double label) { + this.cut_point = val; + this.statistics.addToValue(0, 1); + this.statistics.addToValue(1, label); + this.statistics.addToValue(2, label * label); + } + + /** + * Insert a new value into the tree, updating both the sum of values and + * sum of squared values arrays + */ + public void insertValue(double val, double label) { + //System.out.println(val); + // If the new value equals the value stored in a node, update + // the node information + if (val == this.cut_point) { + this.statistics.addToValue(0, 1); + this.statistics.addToValue(1, label); + this.statistics.addToValue(2, label * label); + } // If the new value is less or greater than the value in a node, send the value down to the child node. + // If no left child exists, create one + else { + + if (this.child == null) { + this.child = new HoeffdingNominalAttributeClassObserver.Node(val, label); + numberOfPossibleValues += 1 ; + } else { + this.child.insertValue(val, label); + } + + } + } + } + + // Root node of the tree structure for this attribute + protected HoeffdingNominalAttributeClassObserver.Node root = null; + + // Global variables for use in the FindBestSplit algorithm + double sumOne; + double sumRest; + double sumSqOne; + double sumSqRest; + double countOne; + double countRest; + double sumTotal; + double sumSqTotal; + double count ; + boolean binaryOnly; + int numberOfPossibleValues ; + + public void observeAttributeClass(double attVal, int classVal, double weight) { + + + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(SplitCriterion criterion, double[] preSplitDist, int attIndex, boolean binaryOnly) { + + // Initialise global variables + sumOne = 0; + sumRest = 0; + sumSqOne = 0; + sumSqRest = 0; + countOne = 0; + countRest = 0; + sumTotal = preSplitDist[1]; + sumSqTotal = preSplitDist[2]; + count = preSplitDist[0]; + this.binaryOnly = binaryOnly; + if (binaryOnly) { + return searchForBestBinarySplitOption(this.root, null, criterion, attIndex); + } else { + return searchForBestMultiwaySplitOption(this.root, null, criterion, attIndex); + } + } + + /** + * Implementation of the FindBestSplit algorithm + */ + protected AttributeSplitSuggestion searchForBestBinarySplitOption(HoeffdingNominalAttributeClassObserver.Node currentNode, AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) { + + + + // Return null if the current node is null or we have finished looking through all the possible splits + if (currentNode == null || countRest == 0.0) { + return currentBestOption; + } + + if (currentNode.child != null) { + currentBestOption = searchForBestBinarySplitOption(currentNode.child, currentBestOption, criterion, attIndex); + } + + sumOne = currentNode.statistics.getValue(1); + sumRest = sumTotal - sumOne; + sumSqOne = currentNode.statistics.getValue(2); + sumSqRest = sumSqTotal - sumSqOne; + countOne = currentNode.statistics.getValue(0); + countRest = count - countOne; + + double[][] postSplitDists = new double[][]{{countOne, sumOne, sumSqOne}, {countRest, sumRest, sumSqRest}}; + double[] preSplitDist = new double[]{(count), (sumTotal), (sumSqTotal)}; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + + if ((currentBestOption == null) || (merit > currentBestOption.merit)) { + currentBestOption = new AttributeSplitSuggestion( + new NominalAttributeBinaryTest(attIndex, + (int) currentNode.cut_point), postSplitDists, merit); + + + } + + return currentBestOption; + + } + protected AttributeSplitSuggestion searchForBestMultiwaySplitOption(HoeffdingNominalAttributeClassObserver.Node currentNode, AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) + { + + double[][] postSplitDists = new double[numberOfPossibleValues][3]; + for (int i = 0; i < numberOfPossibleValues; i++) + { + + if (currentNode == null || countRest == 0.0) { + return currentBestOption; + } + postSplitDists[i][0] = currentNode.statistics.getValue(0); + postSplitDists[i][1] = currentNode.statistics.getValue(1); + postSplitDists[i][2] = currentNode.statistics.getValue(2); + currentNode = currentNode.child ; + + } + double[] preSplitDist = new double[]{(count), (sumTotal), (sumSqTotal)}; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + if ((currentBestOption == null) || (merit > currentBestOption.merit)) { + currentBestOption = new AttributeSplitSuggestion( + new NominalAttributeMultiwayTest(attIndex), postSplitDists, merit); + } + + + + return currentBestOption; + + } + + + + + + + + public void observeAttributeTarget(double attVal, double classVal) { + if (Double.isNaN(attVal)) { //Instance.isMissingValue(attVal) + } else { + if (this.root == null) { + numberOfPossibleValues= 1 ; + this.root = new HoeffdingNominalAttributeClassObserver.Node(attVal, classVal); + } else { + this.root.insertValue(attVal, classVal); + } + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } +} diff --git a/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNumericAttributeClassObserver.java b/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNumericAttributeClassObserver.java new file mode 100644 index 000000000..f8fc2d810 --- /dev/null +++ b/moa/src/main/java/moa/classifiers/core/attributeclassobservers/HoeffdingNumericAttributeClassObserver.java @@ -0,0 +1,226 @@ +package moa.classifiers.core.attributeclassobservers; + +import moa.classifiers.core.AttributeSplitSuggestion; +import moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; +import moa.classifiers.core.splitcriteria.SplitCriterion; +import moa.core.DoubleVector; +import moa.core.ObjectRepository; +import moa.tasks.TaskMonitor; + +import java.io.Serializable; + +public class HoeffdingNumericAttributeClassObserver extends BinaryTreeNumericAttributeClassObserver implements NumericAttributeClassObserver { + private static final long serialVersionUID = 1L; + + protected class Node implements Serializable { + + private static final long serialVersionUID = 1L; + + // The split point to use + public double cut_point; + + // E-BST statistics + public DoubleVector leftStatistics = new DoubleVector(); + public DoubleVector rightStatistics = new DoubleVector(); + + // Child nodes + public HoeffdingNumericAttributeClassObserver.Node left; + public HoeffdingNumericAttributeClassObserver.Node right; + + public Node(double val, double label) { + this.cut_point = val; + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + } + + /** + * Insert a new value into the tree, updating both the sum of values and + * sum of squared values arrays + */ + public void insertValue(double val, double label) { + + // If the new value equals the value stored in a node, update + // the left (<=) node information + if (val == this.cut_point) { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + } // If the new value is less than the value in a node, update the + // left distribution and send the value down to the left child node. + // If no left child exists, create one + else if (val <= this.cut_point) { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + if (this.left == null) { + this.left = new HoeffdingNumericAttributeClassObserver.Node(val, label); + } else { + this.left.insertValue(val, label); + } + } // If the new value is greater than the value in a node, update the + // right (>) distribution and send the value down to the right child node. + // If no right child exists, create one + else { // val > cut_point + this.rightStatistics.addToValue(0, 1); + this.rightStatistics.addToValue(1, label); + this.rightStatistics.addToValue(2, label * label); + if (this.right == null) { + this.right = new HoeffdingNumericAttributeClassObserver.Node(val, label); + } else { + this.right.insertValue(val, label); + } + } + } + } + + // Root node of the E-BST structure for this attribute + protected HoeffdingNumericAttributeClassObserver.Node root = null; + + // Global variables for use in the FindBestSplit algorithm + double sumTotalLeft; + double sumTotalRight; + double sumSqTotalLeft; + double sumSqTotalRight; + double countRightTotal; + double countLeftTotal; + + public void observeAttributeClass(double attVal, double classVal, double weight) { + if (Double.isNaN(attVal)) { //Instance.isMissingValue(attVal) + } else { + if (this.root == null) { + this.root = new HoeffdingNumericAttributeClassObserver.Node(attVal, classVal); + } else { + this.root.insertValue(attVal, classVal); + } + } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(SplitCriterion criterion, double[] preSplitDist, int attIndex, boolean binaryOnly) { + + // Initialise global variables + sumTotalLeft = 0; + sumTotalRight = preSplitDist[1]; + sumSqTotalLeft = 0; + sumSqTotalRight = preSplitDist[2]; + countLeftTotal = 0; + countRightTotal = preSplitDist[0]; + return searchForBestSplitOption(this.root, null, criterion, attIndex); + } + + /** + * Implementation of the FindBestSplit algorithm from E.Ikonomovska et al. + */ + protected AttributeSplitSuggestion searchForBestSplitOption(HoeffdingNumericAttributeClassObserver.Node currentNode, AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) { + // Return null if the current node is null or we have finished looking through all the possible splits + if (currentNode == null || countRightTotal == 0.0) { + return currentBestOption; + } + + if (currentNode.left != null) { + currentBestOption = searchForBestSplitOption(currentNode.left, currentBestOption, criterion, attIndex); + } + + sumTotalLeft += currentNode.leftStatistics.getValue(1); + sumTotalRight -= currentNode.leftStatistics.getValue(1); + sumSqTotalLeft += currentNode.leftStatistics.getValue(2); + sumSqTotalRight -= currentNode.leftStatistics.getValue(2); + countLeftTotal += currentNode.leftStatistics.getValue(0); + countRightTotal -= currentNode.leftStatistics.getValue(0); + + double[][] postSplitDists = new double[][]{{countLeftTotal, sumTotalLeft, sumSqTotalLeft}, {countRightTotal, sumTotalRight, sumSqTotalRight}}; + double[] preSplitDist = new double[]{(countLeftTotal + countRightTotal), (sumTotalLeft + sumTotalRight), (sumSqTotalLeft + sumSqTotalRight)}; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + + if ((currentBestOption == null) || (merit > currentBestOption.merit)) { + currentBestOption = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + currentNode.cut_point, true), postSplitDists, merit); + + } + + if (currentNode.right != null) { + currentBestOption = searchForBestSplitOption(currentNode.right, currentBestOption, criterion, attIndex); + } + sumTotalLeft -= currentNode.leftStatistics.getValue(1); + sumTotalRight += currentNode.leftStatistics.getValue(1); + sumSqTotalLeft -= currentNode.leftStatistics.getValue(2); + sumSqTotalRight += currentNode.leftStatistics.getValue(2); + countLeftTotal -= currentNode.leftStatistics.getValue(0); + countRightTotal += currentNode.leftStatistics.getValue(0); + + return currentBestOption; + } + + /** + * A method to remove all nodes in the E-BST in which it and all it's + * children represent 'bad' split points + */ + public void removeBadSplits(SplitCriterion criterion, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { + removeBadSplitNodes(criterion, this.root, lastCheckRatio, lastCheckSDR, lastCheckE); + } + + /** + * Recursive method that first checks all of a node's children before + * deciding if it is 'bad' and may be removed + */ + private boolean removeBadSplitNodes(SplitCriterion criterion, HoeffdingNumericAttributeClassObserver.Node currentNode, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { + boolean isBad = false; + + if (currentNode == null) { + return true; + } + + if (currentNode.left != null) { + isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); + } + + if (currentNode.right != null && isBad) { + isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); + } + + if (isBad) { + + double[][] postSplitDists = new double[][]{{currentNode.leftStatistics.getValue(0), currentNode.leftStatistics.getValue(1), currentNode.leftStatistics.getValue(2)}, {currentNode.rightStatistics.getValue(0), currentNode.rightStatistics.getValue(1), currentNode.rightStatistics.getValue(2)}}; + double[] preSplitDist = new double[]{(currentNode.leftStatistics.getValue(0) + currentNode.rightStatistics.getValue(0)), (currentNode.leftStatistics.getValue(1) + currentNode.rightStatistics.getValue(1)), (currentNode.leftStatistics.getValue(2) + currentNode.rightStatistics.getValue(2))}; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + + if ((merit / lastCheckSDR) < (lastCheckRatio - (2 * lastCheckE))) { + currentNode = null; + return true; + } + } + + return false; + } + public void observeAttributeTarget(double attVal, double classVal) { + if (Double.isNaN(attVal)) { //Instance.isMissingValue(attVal) + } else { + if (this.root == null) { + this.root = new HoeffdingNumericAttributeClassObserver.Node(attVal, classVal); + } else { + this.root.insertValue(attVal, classVal); + } + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } +} + + diff --git a/moa/src/main/java/moa/classifiers/core/driftdetection/ADWINChangeDetector.java b/moa/src/main/java/moa/classifiers/core/driftdetection/ADWINChangeDetector.java index 154851096..4c94538f1 100644 --- a/moa/src/main/java/moa/classifiers/core/driftdetection/ADWINChangeDetector.java +++ b/moa/src/main/java/moa/classifiers/core/driftdetection/ADWINChangeDetector.java @@ -59,6 +59,7 @@ public void input(double inputValue) { @Override public void resetLearning() { adwin = new ADWIN((double) this.deltaAdwinOption.getValue()); + super.resetLearning(); } @Override diff --git a/moa/src/main/java/moa/classifiers/core/driftdetection/DDM.java b/moa/src/main/java/moa/classifiers/core/driftdetection/DDM.java index be0baac1e..8e3f5e9a3 100644 --- a/moa/src/main/java/moa/classifiers/core/driftdetection/DDM.java +++ b/moa/src/main/java/moa/classifiers/core/driftdetection/DDM.java @@ -19,6 +19,7 @@ package moa.classifiers.core.driftdetection; import com.github.javacliparser.IntOption; +import com.github.javacliparser.FloatOption; import moa.core.ObjectRepository; import moa.tasks.TaskMonitor; @@ -41,8 +42,19 @@ public class DDM extends AbstractChangeDetector { 'n', "The minimum number of instances before permitting detecting change.", 30, 0, Integer.MAX_VALUE); + + public FloatOption warningLevelOption = new FloatOption( + "warningLevel", 'w', "Warning Level.", + 2.0, 1.0, 4.0); + + public FloatOption outcontrolLevelOption = new FloatOption( + "outcontrolLevel", 'o', "Outcontrol Level.", + 3.0, 1.0, 5.0); + private int m_n; + private int minNumInstances; + private double m_p; private double m_s; @@ -53,6 +65,10 @@ public class DDM extends AbstractChangeDetector { private double m_smin; + private double warningLevel; + + private double outcontrolLevel; + public DDM() { resetLearning(); } @@ -65,6 +81,9 @@ public void resetLearning() { m_psmin = Double.MAX_VALUE; m_pmin = Double.MAX_VALUE; m_smin = Double.MAX_VALUE; + minNumInstances = this.minNumInstancesOption.getValue(); + warningLevel = this.warningLevelOption.getValue(); + outcontrolLevel = this.outcontrolLevelOption.getValue(); } @Override @@ -86,7 +105,7 @@ public void input(double prediction) { this.isWarningZone = false; this.delay = 0; - if (m_n < this.minNumInstancesOption.getValue()) { + if (m_n < minNumInstances) { return; } @@ -96,11 +115,11 @@ public void input(double prediction) { m_psmin = m_p + m_s; } - if (m_n > this.minNumInstancesOption.getValue() && m_p + m_s > m_pmin + 3 * m_smin) { + if (m_n > minNumInstances && m_p + m_s > m_pmin + outcontrolLevel * m_smin) { //System.out.println(m_p + ",D"); this.isChangeDetected = true; //resetLearning(); - } else if (m_p + m_s > m_pmin + 2 * m_smin) { + } else if (m_p + m_s > m_pmin + warningLevel * m_smin) { //System.out.println(m_p + ",W"); this.isWarningZone = true; } else { diff --git a/moa/src/main/java/moa/classifiers/core/driftdetection/RDDM.java b/moa/src/main/java/moa/classifiers/core/driftdetection/RDDM.java new file mode 100644 index 000000000..cb6f965a2 --- /dev/null +++ b/moa/src/main/java/moa/classifiers/core/driftdetection/RDDM.java @@ -0,0 +1,237 @@ +/* + * RDDM.java + * Copyright (C) 2016 Barros, Cabral, Goncalves, Santos + * @authors Roberto S. M. Barros (roberto@cin.ufpe.br) + * Danilo Cabral (danilocabral@danilocabral.com.br) + * Paulo M. Goncalves Jr. (paulomgj@gmail.com) + * Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * @version $Version: 1 $ + * + * Evolved from DDM.java + * Copyright (C) 2008 University of Waikato, Hamilton, New Zealand + * @author Manuel Baena (mbaena@lcc.uma.es) + * @version $Revision: 7 $ + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Reactive Drift Detection Method (RDDM) + * published as: + * Roberto S. M. Barros, Danilo R. L. Cabral, Paulo M. Goncalves Jr., + * and Silas G. T. C. Santos: + * RDDM: Reactive Drift Detection Method. + * Expert Systems With Applications 90C (2017) pp. 344-355. + * DOI: 10.1016/j.eswa.2017.08.023 + */ + +package moa.classifiers.core.driftdetection; + +import moa.core.ObjectRepository; +import com.github.javacliparser.IntOption; +import com.github.javacliparser.FloatOption; +import moa.tasks.TaskMonitor; + +public class RDDM extends AbstractChangeDetector { + private static final long serialVersionUID = -489867468386968209L; + + public IntOption minNumInstancesOption = new IntOption("minNumInstances", + 'n', "Minimum number of instances before monitoring changes.", + 129, 0, Integer.MAX_VALUE); + + public FloatOption warningLevelOption = new FloatOption("warningLevel", + 'w', "Warning Level.", + 1.773, 1.0, 4.0); + + public FloatOption driftLevelOption = new FloatOption("driftLevel", + 'o', "Drift Level.", + 2.258, 1.0, 5.0); + + public IntOption maxSizeConceptOption = new IntOption("maxSizeConcept", + 'x', "Maximum Size of Concept.", + 40000, 1, Integer.MAX_VALUE); + + public IntOption minSizeStableConceptOption = new IntOption("minSizeStableConcept", + 'y', "Minimum Size of Stable Concept.", + 7000, 1, 20000); + + public IntOption warnLimitOption = new IntOption("warnLimit", + 'z', "Warning Limit of instances", + 1400, 1, 20000); + + private int minNumInstances; + private double warningLevel; + private double driftLevel; + private int maxSizeConcept; + private int minSizeStableConcept; + private int warnLimit; + + private int m_n; + private double m_p; + private double m_s; + private double m_pmin; + private double m_smin; + private double m_psmin; + + private byte [] storedPredictions; + private int numStoredInstances, firstPos, lastPos, pos, i; + private int lastWarnInst, lastWarnPos; + private int instNum; + private boolean rddmDrift; + + public void initialize() { + minNumInstances = this.minNumInstancesOption.getValue(); + warningLevel = this.warningLevelOption.getValue(); + driftLevel = this.driftLevelOption.getValue(); + maxSizeConcept = this.maxSizeConceptOption.getValue(); + minSizeStableConcept = this.minSizeStableConceptOption.getValue(); + warnLimit = this.warnLimitOption.getValue(); + storedPredictions = new byte[minSizeStableConcept]; + numStoredInstances = 0; + firstPos = 0; + lastPos = -1; // This means storedPredictions is empty. + lastWarnPos = -1; + lastWarnInst = -1; + instNum = 0; + rddmDrift = false; + this.isChangeDetected = false; + + resetLearning(); + m_pmin = Double.MAX_VALUE; + m_smin = Double.MAX_VALUE; + m_psmin = Double.MAX_VALUE; + } + + @Override + public void resetLearning() { + m_n = 1; + m_p = 1; + m_s = 0; + if (this.isChangeDetected) { + m_pmin = Double.MAX_VALUE; + m_smin = Double.MAX_VALUE; + m_psmin = Double.MAX_VALUE; + } + } + + @Override + public void input(double prediction) { // In MOA, 1.0=false, 0.0=true. + if (!this.isInitialized) { + initialize(); + this.isInitialized = true; + } + if (rddmDrift) { + resetLearning(); + if (lastWarnPos != -1) { + firstPos = lastWarnPos; + numStoredInstances = lastPos - firstPos + 1; + if (numStoredInstances <= 0) { + numStoredInstances += minSizeStableConcept; + } + } + + pos = firstPos; + for (i = 0; i < numStoredInstances; i++) { + m_p = m_p + (storedPredictions[pos] - m_p) / m_n; + m_s = Math.sqrt(m_p * (1 - m_p) / m_n); + if (this.isChangeDetected && (m_n > minNumInstances) && (m_p + m_s < m_psmin)) { + m_pmin = m_p; + m_smin = m_s; + m_psmin = m_p + m_s; + } + m_n++; + pos = (pos + 1) % minSizeStableConcept; + } + + lastWarnPos = -1; + lastWarnInst = -1; + rddmDrift = false; + this.isChangeDetected = false; + } + + lastPos = (lastPos + 1) % minSizeStableConcept; // Adds prediction at the end of the window. + storedPredictions[lastPos] = (byte) prediction; + if (numStoredInstances < minSizeStableConcept) { // The window grows. + numStoredInstances++; + } else { // The window is full. + firstPos = (firstPos + 1) % minSizeStableConcept; // Start of the window moves. + if (lastWarnPos == lastPos) { + lastWarnPos = -1; + } + } + + m_p = m_p + (prediction - m_p) / m_n; + m_s = Math.sqrt(m_p * (1 - m_p) / m_n); + + instNum++; + m_n++; + this.estimation = m_p; + this.isWarningZone = false; + + if (m_n <= minNumInstances) { + return; + } + + if (m_p + m_s < m_psmin) { + m_pmin = m_p; + m_smin = m_s; + m_psmin = m_p + m_s; + } + + if (m_p + m_s > m_pmin + driftLevel * m_smin) { // DDM Drift + this.isChangeDetected = true; + rddmDrift = true; + if (lastWarnInst == -1) { // DDM Drift without previous warning + firstPos = lastPos; + numStoredInstances = 1; + } + return; + } + + if (m_p + m_s > m_pmin + warningLevel * m_smin) { // Warning Level + // Warning level for warnLimit consecutive instances will force drifts + if ((lastWarnInst != -1) && (lastWarnInst + warnLimit <= instNum)) { + this.isChangeDetected = true; + rddmDrift = true; + firstPos = lastPos; + numStoredInstances = 1; + lastWarnPos = -1; + lastWarnInst = -1; + return; + } + // Warning Zone + this.isWarningZone = true; + if (lastWarnInst == -1) { + lastWarnInst = instNum; + lastWarnPos = lastPos; + } + } else { // Neither DDM Drift nor Warning - disregard false warnings + lastWarnInst = -1; + lastWarnPos = -1; + } + if (m_n > maxSizeConcept && (!isWarningZone)) { // RDDM Drift + rddmDrift = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} diff --git a/moa/src/main/java/moa/classifiers/functions/AdaGrad.java b/moa/src/main/java/moa/classifiers/functions/AdaGrad.java index e9c28a360..d42f29fd0 100644 --- a/moa/src/main/java/moa/classifiers/functions/AdaGrad.java +++ b/moa/src/main/java/moa/classifiers/functions/AdaGrad.java @@ -131,19 +131,17 @@ public void trainOnInstanceImpl(Instance instance) { double z = dotProd(instance, m_weights, instance.classIndex()) + m_bias; double y; - double yhat; double dldz; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? 0 : 1; if (m_loss == LOGLOSS) { - yhat = 1.0 / (1.0 + Math.exp(-z)); - dldz = (yhat - y) * (yhat * (1.0 - yhat)); + double yhat = 1.0 / (1.0 + Math.exp(-z)); + dldz = (yhat - y); } else { y = y * 2 - 1; - yhat = z > 0.0 ? 1.0 : -1.0; if(y * z < 1.0) { @@ -157,7 +155,6 @@ public void trainOnInstanceImpl(Instance instance) { } else { y = instance.classValue(); - yhat = z; dldz = z - y; } diff --git a/moa/src/main/java/moa/classifiers/meta/ADOB.java b/moa/src/main/java/moa/classifiers/meta/ADOB.java new file mode 100644 index 000000000..e430c1f58 --- /dev/null +++ b/moa/src/main/java/moa/classifiers/meta/ADOB.java @@ -0,0 +1,208 @@ +/* + * ADOB.java + * Copyright (C) 2014 Santos, Goncalves, Barros + * @author Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * Paulo M. Goncalves Jr. (paulomgj@gmail.com) + * Roberto S. M. Barros (roberto@cin.ufpe.br) + * @version $Version: 1 $ + * + * Evolved from OzaBoost.java + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) + * @version $Revision: 7 $ + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package moa.classifiers.meta; + +import moa.classifiers.MultiClassClassifier; +import moa.classifiers.AbstractClassifier; +import moa.classifiers.Classifier; +import com.yahoo.labs.samoa.instances.Instance; + +import moa.core.DoubleVector; +import moa.core.Measurement; +import moa.core.MiscUtils; +import moa.options.ClassOption; +import com.github.javacliparser.FlagOption; +import com.github.javacliparser.IntOption; + +/** + * Adaptable Diversity-based Online Boosting (ADOB) is a modified version + * of the online boosting, as proposed by Oza and Russell, which is aimed + * at speeding up the experts recovery after concept drifts. + * + * published as: + * Silas G. T. C. Santos, Paulo M. Goncalves Jr., Geyson D. S. Silva, + * and Roberto S. M. Barros: + * Speeding Up Recovery from Concept Drifts. + * In book: Machine Learning and Knowledge Discovery in Databases, + * ECML/PKDD 2014, Part III, LNCS 8726, pp. 179-194. 09/2014. + * DOI: 10.1007/978-3-662-44845-8_12 + */ + +public class ADOB extends AbstractClassifier implements MultiClassClassifier { + + private static final long serialVersionUID = 1L; + + @Override + public String getPurposeString() { + return "Adaptable Diversity-based Online Boosting (ADOB)"; + } + + public ClassOption baseLearnerOption = new ClassOption("baseLearner", 'l', + "Classifier to train.", Classifier.class, + "drift.SingleClassifierDrift -l trees.HoeffdingTree -d ADWINChangeDetector"); + + public IntOption ensembleSizeOption = new IntOption("ensembleSize", 's', + "The number of models to boost.", 10, 1, Integer.MAX_VALUE); + + public FlagOption pureBoostOption = new FlagOption("pureBoost", 'p', + "Boost with weights only; no poisson."); + + protected Classifier[] ensemble; + + protected int[] orderPosition; + + protected double[] scms; + + protected double[] swms; + + @Override + public void resetLearningImpl() { + this.ensemble = new Classifier[this.ensembleSizeOption.getValue()]; + this.orderPosition = new int[this.ensemble.length]; + Classifier baseLearner = (Classifier) getPreparedClassOption(this.baseLearnerOption); + baseLearner.resetLearning(); + for (int i = 0; i < this.ensemble.length; i++) { + this.ensemble[i] = baseLearner.copy(); + this.orderPosition[i] = i; + } + this.scms = new double[this.ensemble.length]; + this.swms = new double[this.ensemble.length]; + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + // Calculates current accuracy of experts + double[] acc = new double[this.ensemble.length]; + for ( int i=0; i=0 && acc[j] 0.0) { + Instance weightedInst = (Instance) inst.copy(); + weightedInst.setWeight(inst.weight() * k); + this.ensemble[pos].trainOnInstance(weightedInst); + } + + // Increases or decreases lambda based on the prediction of instance + if (this.ensemble[pos].correctlyClassifies(inst)) { + this.scms[pos] += lambda_d; + lambda_d *= this.trainingWeightSeenByModel / (2 * this.scms[pos]); + correct = true; + } else { + this.swms[pos] += lambda_d; + lambda_d *= this.trainingWeightSeenByModel / (2 * this.swms[pos]); + correct = false; + } + } + } + + protected double getEnsembleMemberWeight(int i) { + if ( this.scms[i]>0.0 && this.swms[i]>0.0 ) { + double em = this.swms[i] / (this.scms[i] + this.swms[i]); + if (em <= 0.5) { + double Bm = em / (1.0 - em); + return Math.log(1.0 / Bm); + } + } + return 0.0; + } + + public double[] getVotesForInstance(Instance inst) { + DoubleVector combinedVote = new DoubleVector(); + for (int i = 0; i < this.ensemble.length; i++) { + double memberWeight = getEnsembleMemberWeight(i); + if (memberWeight > 0.0) { + DoubleVector vote = new DoubleVector(this.ensemble[i].getVotesForInstance(inst)); + if (vote.sumOfValues() > 0.0) { + vote.normalize(); + vote.scaleValues(memberWeight); + combinedVote.addValues(vote); + } + } else { + break; + } + } + + return combinedVote.getArrayRef(); + } + + public boolean isRandomizable() { + return true; + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + return new Measurement[]{new Measurement("ensemble size", + this.ensemble != null ? this.ensemble.length : 0)}; + } + + @Override + public Classifier[] getSubClassifiers() { + return this.ensemble.clone(); + } +} diff --git a/moa/src/main/java/moa/classifiers/meta/BOLE.java b/moa/src/main/java/moa/classifiers/meta/BOLE.java new file mode 100644 index 000000000..afb034c9b --- /dev/null +++ b/moa/src/main/java/moa/classifiers/meta/BOLE.java @@ -0,0 +1,224 @@ +/* + * BOLE.java + * Copyright (C) 2015 Santos, Barros + * @authors Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * Roberto Souto Maior de Barros (roberto@cin.ufpe.br) + * @version $Version: 1 $ + * + * Evolved from ADOB.java + * Copyright (C) 2014 Santos, Goncalves, Barros + * @author Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * Paulo M. Goncalves Jr. (paulomgj@gmail.com) + * Roberto S. M. Barros (roberto@cin.ufpe.br) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Boosting-like Online Learning Ensemble (BOLE). + * + * published as: + * Roberto Souto Maior de Barros, Silas Garrido T. de Carvalho Santos, + * and Paulo Mauricio Goncalves Jr.: + * A Boosting-like Online Learning Ensemble. + * In Proceedings of IEEE International Joint Conference + * on Neural Networks (IJCNN), Vancouver, Canada, 2016. + * DOI: 10.1109/IJCNN.2016.7727427 + */ + +package moa.classifiers.meta; + +import moa.classifiers.MultiClassClassifier; +import moa.classifiers.AbstractClassifier; +import moa.classifiers.Classifier; +import moa.core.DoubleVector; +import moa.core.Measurement; +import moa.core.MiscUtils; +import moa.options.ClassOption; +import com.github.javacliparser.FlagOption; +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.instances.Instance; + +public class BOLE extends AbstractClassifier implements MultiClassClassifier { + + private static final long serialVersionUID = 1L; + + public ClassOption baseLearnerOption = new ClassOption("baseLearner", 'l', + "Classifier to train.", Classifier.class, + "drift.SingleClassifierDrift -l trees.HoeffdingTree -d (DDM -n 7 -w 1.2 -o 1.95)"); + + public IntOption ensembleSizeOption = new IntOption("ensembleSize", 's', + "The number of models to boost.", 10, 1, Integer.MAX_VALUE); + + public FlagOption pureBoostOption = new FlagOption("pureBoost", + 'p', "Boost with weights only; no poisson."); + + public FlagOption breakVotesOption = new FlagOption("breakVotes", + 'b', "Break Votes? unchecked=no, checked=yes"); + + public FloatOption errorBoundOption = new FloatOption("errorBound", + 'e', "Error bound percentage for allowing experts to vote.", + 0.5, 0.1, 1.0); + + public FloatOption weightShiftOption = new FloatOption("weightShift", + 'w', "Weight shift associated with the error bound.", + 0.0, 0.0, 5.0); + + private double memberWeight; + private double key_acc; + private int key_position, i, j; + private int maxAcc, minAcc, pos; + private double lambda_d, k; + private boolean correct, okay; + private double em, Bm; + + protected Classifier[] ensemble; + protected int[] orderPosition; + protected double[] scms; + protected double[] swms; + + @Override + public String getPurposeString() { + return "Boosting-like Online Learning Ensemble (BOLE)"; + } + + @Override + public void resetLearningImpl() { + this.ensemble = new Classifier[this.ensembleSizeOption.getValue()]; + this.orderPosition = new int[this.ensemble.length]; + Classifier baseLearner = (Classifier) getPreparedClassOption(this.baseLearnerOption); + baseLearner.resetLearning(); + for (i = 0; i < this.ensemble.length; i++) { + this.ensemble[i] = baseLearner.copy(); + this.orderPosition[i] = i; + } + this.scms = new double[this.ensemble.length]; + this.swms = new double[this.ensemble.length]; + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + // Calculates current accuracy of experts + double[] acc = new double[this.ensemble.length]; + for (i = 0; i < this.ensemble.length; i++) { + acc[i] = this.scms[this.orderPosition[i]] + this.swms[this.orderPosition[i]]; + if (acc[i] != 0.0) { + acc[i] = this.scms[this.orderPosition[i]] / acc[i]; + } + } + + // Sort by accuracy in ascending order + for (i = 1; i < this.ensemble.length; i++) { + key_position = this.orderPosition[i]; + key_acc = acc[i]; + j = i - 1; + while ( (j >=0) && (acc[j] < key_acc) ) { + this.orderPosition[j+1] = this.orderPosition[j]; + acc[j+1] = acc[j]; + j--; + } + this.orderPosition[j+1] = key_position; + acc[j+1] = key_acc; + } + + correct = false; + maxAcc = 0; + minAcc = this.ensemble.length - 1; + lambda_d = 1.0; + for (i = 0; i < this.ensemble.length; i++) { + if (correct) { + pos = this.orderPosition[maxAcc]; + maxAcc++; + } else { + pos = this.orderPosition[minAcc]; + minAcc--; + } + + if (this.pureBoostOption.isSet()) + k = lambda_d; + else + k = MiscUtils.poisson(lambda_d, this.classifierRandom); + + if (k > 0.0) { + Instance weightedInst = (Instance) inst.copy(); + weightedInst.setWeight(inst.weight() * k); + this.ensemble[pos].trainOnInstance(weightedInst); + } + + // Increases or decreases lambda based on the prediction of instance + if (this.ensemble[pos].correctlyClassifies(inst)) { + this.scms[pos] += lambda_d; + lambda_d *= (this.trainingWeightSeenByModel / (2 * this.scms[pos])); + correct = true; + } else { + this.swms[pos] += lambda_d; + lambda_d *= (this.trainingWeightSeenByModel / (2 * this.swms[pos])); + correct = false; + } + } + } + + protected double getEnsembleMemberWeight(int i) { + if ( (this.scms[i] > 0.0) && (this.swms[i] > 0.0) ) { + em = this.swms[i] / (this.scms[i] + this.swms[i]); + if (em <= this.errorBoundOption.getValue()) { + Bm = em / (1.0 - em); + okay = true; + return Math.log(1.0 / Bm); + } + + } + okay = false; + return 0.0; + } + + public double[] getVotesForInstance(Instance inst) { + DoubleVector combinedVote = new DoubleVector(); + for (i = 0; i < this.ensemble.length; i++) { + memberWeight = getEnsembleMemberWeight(i) + this.weightShiftOption.getValue(); + if (okay) { + DoubleVector vote = new DoubleVector(this.ensemble[i].getVotesForInstance(inst)); + if (vote.sumOfValues() > 0.0) { + vote.normalize(); + vote.scaleValues(memberWeight); + combinedVote.addValues(vote); + } + } + else if (this.breakVotesOption.isSet()) { + break; + } + } + return combinedVote.getArrayRef(); + } + + public boolean isRandomizable() { + return true; + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + return new Measurement[]{new Measurement("ensemble size", + this.ensemble != null ? this.ensemble.length : 0)}; + } + + @Override + public Classifier[] getSubClassifiers() { + return this.ensemble.clone(); + } +} diff --git a/moa/src/main/java/moa/classifiers/trees/HoeffdingRegressionTree.java b/moa/src/main/java/moa/classifiers/trees/HoeffdingRegressionTree.java new file mode 100644 index 000000000..786f759eb --- /dev/null +++ b/moa/src/main/java/moa/classifiers/trees/HoeffdingRegressionTree.java @@ -0,0 +1,282 @@ +/* + * HoeffdingTree.java + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +package moa.classifiers.trees; + + +import moa.classifiers.Regressor; + +import com.yahoo.labs.samoa.instances.Instance; +import moa.classifiers.core.AttributeSplitSuggestion; +import moa.classifiers.core.attributeclassobservers.AttributeClassObserver; +import moa.classifiers.core.attributeclassobservers.HoeffdingNominalAttributeClassObserver; +import moa.classifiers.core.splitcriteria.SplitCriterion; +import moa.core.AutoExpandVector; +import moa.classifiers.rules.functions.Perceptron; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + + + +public class HoeffdingRegressionTree extends HoeffdingTree implements Regressor { + + private static final long serialVersionUID = 1L; + + @Override + public String getPurposeString() { + return "Hoeffding Regression Tree or VFDT."; + } + + + + + + public static class InactiveLearningNodeForRegression extends InactiveLearningNode { + public InactiveLearningNodeForRegression(double[] initialClassObservations) { + super(initialClassObservations); + } + + @Override + public void learnFromInstance(Instance inst, HoeffdingTree ht) { + this.observedClassDistribution.addToValue(0, + 1); + this.observedClassDistribution.addToValue(1, + inst.classValue()); + this.observedClassDistribution.addToValue(2, + inst.classValue()* inst.classValue()); + + + } + } + + public static class ActiveLearningNodeForRegression extends ActiveLearningNode { + public ActiveLearningNodeForRegression(double[] initialClassObservations) { + super(initialClassObservations); + this.weightSeenAtLastSplitEvaluation = getWeightSeen(); + this.isInitialized = false; + } + + @Override + public void learnFromInstance(Instance inst, HoeffdingTree ht) { + if (this.isInitialized == false) { + this.attributeObservers = new AutoExpandVector(inst.numAttributes()); + this.isInitialized = true; + } + this.observedClassDistribution.addToValue(0, + 1); + this.observedClassDistribution.addToValue(1, + inst.classValue()); + this.observedClassDistribution.addToValue(2, + inst.classValue()* inst.classValue()); + + for (int i = 0; i < inst.numAttributes() - 1; i++) { + int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); + AttributeClassObserver obs = this.attributeObservers.get(i); + if (obs == null) { + obs = inst.attribute(instAttIndex).isNominal() ? ht.newNominalClassObserver() : ht.newNumericClassObserver(); + this.attributeObservers.set(i, obs); + } + obs.observeAttributeTarget(inst.value(instAttIndex), inst.classValue()); + } + } + + @Override + public double getWeightSeen() { + return this.observedClassDistribution.getValue(0); + } + + + } + + + public static class MeanClass extends ActiveLearningNodeForRegression { + public MeanClass(double[] initialClassObservations) { + super(initialClassObservations); + + + } + + public static double sum(double[] vecteur) { + double result = 0; + for (int i = 0; i < vecteur.length; i++) { + result += vecteur[i]; + } + return result; + } + + @Override + public double[] getClassVotes(Instance inst, HoeffdingTree ht) { + double numberOfExamplesSeen = 0; + double sumOfValues = 0; + double prediction = 0; + double V[] = super.getClassVotes(inst, ht); + sumOfValues = V[1]; + numberOfExamplesSeen = V[0]; + + + prediction = sumOfValues / numberOfExamplesSeen; + + + + return new double[]{prediction}; + } + + } + + @Override + public double[] getVotesForInstance(Instance inst) { + if (this.treeRoot != null) { + FoundNode foundNode = this.treeRoot.filterInstanceToLeaf(inst, + null, -1); + Node leafNode = foundNode.node; + if (leafNode == null) { + leafNode = foundNode.parent; + } + return leafNode.getClassVotes(inst, this); + + } else { + + return new double[]{0}; + } + } + + protected LearningNode newLearningNode(double[] initialClassObservations) { + + return new MeanClass(initialClassObservations); + + } + + + @Override + public void trainOnInstanceImpl(Instance inst) { + if (this.treeRoot == null) { + this.treeRoot = newLearningNode(); + this.activeLeafNodeCount = 1; + } + FoundNode foundNode = this.treeRoot.filterInstanceToLeaf(inst, null, -1); + Node leafNode = foundNode.node; + if (leafNode == null) { + leafNode = newLearningNode(); + foundNode.parent.setChild(foundNode.parentBranch, leafNode); + this.activeLeafNodeCount++; + } + if (leafNode instanceof LearningNode) { + LearningNode learningNode = (LearningNode) leafNode; + learningNode.learnFromInstance(inst, this); + if (this.growthAllowed + && (learningNode instanceof ActiveLearningNodeForRegression)) { + ActiveLearningNodeForRegression activeLearningNode = (ActiveLearningNodeForRegression) learningNode; + double weightSeen = activeLearningNode.getWeightSeen(); + if (weightSeen + - activeLearningNode.getWeightSeenAtLastSplitEvaluation() >= this.gracePeriodOption.getValue()) { + attemptToSplit(activeLearningNode, foundNode.parent, + foundNode.parentBranch); + activeLearningNode.setWeightSeenAtLastSplitEvaluation(weightSeen); + } + } + } + + } + + protected void attemptToSplit(ActiveLearningNode node, SplitNode parent, + int parentIndex) { + if (!node.observedClassDistributionIsPure()) { + SplitCriterion splitCriterion = (SplitCriterion) getPreparedClassOption(this.splitCriterionOption); + AttributeSplitSuggestion[] bestSplitSuggestions = node.getBestSplitSuggestions(splitCriterion, this); + Arrays.sort(bestSplitSuggestions); + boolean shouldSplit = false; + if (bestSplitSuggestions.length < 2) { + shouldSplit = bestSplitSuggestions.length > 0; + } else { + double hoeffdingBound = computeHoeffdingBound(splitCriterion.getRangeOfMerit(node.getObservedClassDistribution()), + this.splitConfidenceOption.getValue(), node.getWeightSeen()); + AttributeSplitSuggestion bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; + AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2]; + if (( secondBestSuggestion.merit/bestSuggestion.merit < 1 - hoeffdingBound) + || (hoeffdingBound < this.tieThresholdOption.getValue())) { + shouldSplit = true; + System.out.println(hoeffdingBound poorAtts = new HashSet(); + // scan 1 - add any poor to set + for (int i = 0; i < bestSplitSuggestions.length; i++) { + if (bestSplitSuggestions[i].splitTest != null) { + int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); + if (splitAtts.length == 1) { + if (bestSuggestion.merit + - bestSplitSuggestions[i].merit > hoeffdingBound) { + poorAtts.add(new Integer(splitAtts[0])); + } + } + } + } + // scan 2 - remove good ones from set + for (int i = 0; i < bestSplitSuggestions.length; i++) { + if (bestSplitSuggestions[i].splitTest != null) { + int[] splitAtts = bestSplitSuggestions[i].splitTest.getAttsTestDependsOn(); + if (splitAtts.length == 1) { + if (bestSuggestion.merit + - bestSplitSuggestions[i].merit < hoeffdingBound) { + poorAtts.remove(new Integer(splitAtts[0])); + } + } + } + } + for (int poorAtt : poorAtts) { + node.disableAttribute(poorAtt); + } + } + } + if (shouldSplit) { + + AttributeSplitSuggestion splitDecision = bestSplitSuggestions[bestSplitSuggestions.length - 1]; + if (splitDecision.splitTest == null) { + // preprune - null wins + deactivateLearningNode(node, parent, parentIndex); + + } else { + + SplitNode newSplit = newSplitNode(splitDecision.splitTest, + node.getObservedClassDistribution(),splitDecision.numSplits() ); + for (int i = 0; i < splitDecision.numSplits(); i++) { + Node newChild = newLearningNode(splitDecision.resultingClassDistributionFromSplit(i)); + newSplit.setChild(i, newChild); + } + this.activeLeafNodeCount--; + this.decisionNodeCount++; + this.activeLeafNodeCount += splitDecision.numSplits(); + if (parent == null) { + this.treeRoot = newSplit; + } else { + parent.setChild(parentIndex, newSplit); + } + } + // manage memory + enforceTrackerLimit(); + } + } + } +} + + diff --git a/moa/src/test/java/moa/classifiers/meta/ADOBTest.java b/moa/src/test/java/moa/classifiers/meta/ADOBTest.java new file mode 100644 index 000000000..809e450cc --- /dev/null +++ b/moa/src/test/java/moa/classifiers/meta/ADOBTest.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * ADOBTest.java + * Copyright (C) 2014 Santos, Goncalves, Barros + */ +package moa.classifiers.meta; + +import junit.framework.Test; +import junit.framework.TestSuite; +import moa.classifiers.AbstractMultipleClassifierTestCase; +import moa.classifiers.Classifier; +import static moa.test.MoaTestCase.runTest; + +/** + * Tests the ADOB classifier. + * + * @author Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * @version $Revision$ + */ +public class ADOBTest + extends AbstractMultipleClassifierTestCase { + + /** + * Constructs the test case. Called by subclasses. + * + * @param name the name of the test + */ + public ADOBTest(String name) { + super(name); + this.setNumberTests(1); + } + + /** + * Returns the classifier setups to use in the regression test. + * + * @return the setups + */ + @Override + protected Classifier[] getRegressionClassifierSetups() { + return new Classifier[]{ + new ADOB(), + }; + } + + /** + * Returns a test suite. + * + * @return the test suite + */ + public static Test suite() { + return new TestSuite(ADOBTest.class); + } + + /** + * Runs the test from commandline. + * + * @param args ignored + */ + public static void main(String[] args) { + runTest(suite()); + } +} diff --git a/moa/src/test/java/moa/classifiers/meta/BOLETest.java b/moa/src/test/java/moa/classifiers/meta/BOLETest.java new file mode 100644 index 000000000..0dcef149c --- /dev/null +++ b/moa/src/test/java/moa/classifiers/meta/BOLETest.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * BOLETest.java + * Copyright (C) 2015 Santos, Barros + */ +package moa.classifiers.meta; + +import junit.framework.Test; +import junit.framework.TestSuite; +import moa.classifiers.AbstractMultipleClassifierTestCase; +import moa.classifiers.Classifier; +import static moa.test.MoaTestCase.runTest; + +/** + * Tests the BOLE classifier. + * + * @author Silas G. T. C. Santos (sgtcs@cin.ufpe.br) + * @version $Revision$ + */ +public class BOLETest + extends AbstractMultipleClassifierTestCase { + + /** + * Constructs the test case. Called by subclasses. + * + * @param name the name of the test + */ + public BOLETest(String name) { + super(name); + this.setNumberTests(1); + } + + /** + * Returns the classifier setups to use in the regression test. + * + * @return the setups + */ + @Override + protected Classifier[] getRegressionClassifierSetups() { + return new Classifier[]{ + new BOLE(), + }; + } + + /** + * Returns a test suite. + * + * @return the test suite + */ + public static Test suite() { + return new TestSuite(BOLETest.class); + } + + /** + * Runs the test from commandline. + * + * @param args ignored + */ + public static void main(String[] args) { + runTest(suite()); + } +} diff --git a/moa/src/test/resources/moa/classifiers/meta/ADOB.ref b/moa/src/test/resources/moa/classifiers/meta/ADOB.ref new file mode 100644 index 000000000..cf2116728 --- /dev/null +++ b/moa/src/test/resources/moa/classifiers/meta/ADOB.ref @@ -0,0 +1,355 @@ +--> classification-out0.arff +moa.classifiers.meta.ADOB + +Index + 10000 +Votes + 0: 4.31349713 + 1: 1.77690042 +Measurements + classified instances: 9999 + classifications correct (percent): 78.66786679 + Kappa Statistic (percent): 55.92005442 + Kappa Temporal Statistic (percent): 55.03794266 + Kappa M Statistic (percent): 47.91208791 +Model measurements + model training instances: 9999 + ensemble size: 10 + [avg] model training instances: 9703.4 + [err] model training instances: 622.37358894 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 16.2 + [err] tree size (nodes): 12.96833751 + [avg] tree size (leaves): 12.8 + [err] tree size (leaves): 9.5312352 + [avg] active learning leaves: 12.8 + [err] active learning leaves: 9.5312352 + [avg] tree depth: 2.2 + [err] tree depth: 0.63245553 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 20000 +Votes + 0: 3.30396924 + 1: 3.84022829 +Measurements + classified instances: 19999 + classifications correct (percent): 82.57412871 + Kappa Statistic (percent): 64.1324445 + Kappa Temporal Statistic (percent): 63.61833177 + Kappa M Statistic (percent): 58.22344761 +Model measurements + model training instances: 19999 + ensemble size: 10 + [avg] model training instances: 19239.4 + [err] model training instances: 1575.67457233 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 39.5 + [err] tree size (nodes): 14.43952908 + [avg] tree size (leaves): 29.7 + [err] tree size (leaves): 9.25022522 + [avg] active learning leaves: 29.7 + [err] active learning leaves: 9.25022522 + [avg] tree depth: 3 + [err] tree depth: 0.47140452 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 30000 +Votes + 0: 0.30847542 + 1: 7.7688272 +Measurements + classified instances: 29999 + classifications correct (percent): 84.87282909 + Kappa Statistic (percent): 68.83934721 + Kappa Temporal Statistic (percent): 68.6537266 + Kappa M Statistic (percent): 63.66111467 +Model measurements + model training instances: 29999 + ensemble size: 10 + [avg] model training instances: 28550.8 + [err] model training instances: 2307.57228546 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 57.3 + [err] tree size (nodes): 17.4359144 + [avg] tree size (leaves): 42.2 + [err] tree size (leaves): 10.62282658 + [avg] active learning leaves: 42.2 + [err] active learning leaves: 10.62282658 + [avg] tree depth: 3.5 + [err] tree depth: 0.70710678 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 40000 +Votes + 0: 1.74710694 + 1: 6.72830582 +Measurements + classified instances: 39999 + classifications correct (percent): 86.19715493 + Kappa Statistic (percent): 71.61069486 + Kappa Temporal Statistic (percent): 71.46917472 + Kappa M Statistic (percent): 66.97966507 +Model measurements + model training instances: 39999 + ensemble size: 10 + [avg] model training instances: 38659.7 + [err] model training instances: 3124.26520372 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 67.2 + [err] tree size (nodes): 27.99126848 + [avg] tree size (leaves): 48.2 + [err] tree size (leaves): 16.8179798 + [avg] active learning leaves: 48.2 + [err] active learning leaves: 16.8179798 + [avg] tree depth: 4 + [err] tree depth: 0.81649658 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 50000 +Votes + 0: 5.25268731 + 1: 3.46270964 +Measurements + classified instances: 49999 + classifications correct (percent): 87.08374167 + Kappa Statistic (percent): 73.42099943 + Kappa Temporal Statistic (percent): 73.26765461 + Kappa M Statistic (percent): 69.09456355 +Model measurements + model training instances: 49999 + ensemble size: 10 + [avg] model training instances: 48501.4 + [err] model training instances: 3955.08326348 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 77 + [err] tree size (nodes): 35.12833614 + [avg] tree size (leaves): 54.6 + [err] tree size (leaves): 20.7910237 + [avg] active learning leaves: 54.6 + [err] active learning leaves: 20.7910237 + [avg] tree depth: 4.2 + [err] tree depth: 0.91893658 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 60000 +Votes + 0: 1.18165541 + 1: 6.95943137 +Measurements + classified instances: 59999 + classifications correct (percent): 87.75146252 + Kappa Statistic (percent): 74.8372763 + Kappa Temporal Statistic (percent): 74.67172152 + Kappa M Statistic (percent): 70.8396159 +Model measurements + model training instances: 59999 + ensemble size: 10 + [avg] model training instances: 55328.65 + [err] model training instances: 13960.74927591 + [avg] Change detected: 71.7 + [err] Change detected: 226.73530823 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 83.9 + [err] tree size (nodes): 45.25593884 + [avg] tree size (leaves): 59.1 + [err] tree size (leaves): 28.73035561 + [avg] active learning leaves: 59.1 + [err] active learning leaves: 28.73035561 + [avg] tree depth: 4.3 + [err] tree depth: 1.7669811 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 70000 +Votes + 0: 1.47738616 + 1: 6.87183435 +Measurements + classified instances: 69999 + classifications correct (percent): 88.23840341 + Kappa Statistic (percent): 75.85521504 + Kappa Temporal Statistic (percent): 75.75605878 + Kappa M Statistic (percent): 72.09530911 +Model measurements + model training instances: 69999 + ensemble size: 10 + [avg] model training instances: 64265.35 + [err] model training instances: 16273.63935383 + [avg] Change detected: 393 + [err] Change detected: 1242.77512045 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 98.1 + [err] tree size (nodes): 48.679108 + [avg] tree size (leaves): 68.9 + [err] tree size (leaves): 31.20345422 + [avg] active learning leaves: 68.9 + [err] active learning leaves: 31.20345422 + [avg] tree depth: 4.5 + [err] tree depth: 1.8408935 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 80000 +Votes + 0: 2.08648259 + 1: 6.49973266 +Measurements + classified instances: 79999 + classifications correct (percent): 88.69485869 + Kappa Statistic (percent): 76.78380302 + Kappa Temporal Statistic (percent): 76.7201215 + Kappa M Statistic (percent): 73.17594021 +Model measurements + model training instances: 79999 + ensemble size: 10 + [avg] model training instances: 72912.25 + [err] model training instances: 18464.40482851 + [avg] Change detected: 386.7 + [err] Change detected: 1222.85277119 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 115.5 + [err] tree size (nodes): 54.64074589 + [avg] tree size (leaves): 80.3 + [err] tree size (leaves): 35.08101734 + [avg] active learning leaves: 80.3 + [err] active learning leaves: 35.08101734 + [avg] tree depth: 4.7 + [err] tree depth: 1.82878223 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 90000 +Votes + 0: 0.01685327 + 1: 8.77910253 +Measurements + classified instances: 89999 + classifications correct (percent): 89.03210036 + Kappa Statistic (percent): 77.4657281 + Kappa Temporal Statistic (percent): 77.4041433 + Kappa M Statistic (percent): 73.96888186 +Model measurements + model training instances: 89999 + ensemble size: 10 + [avg] model training instances: 81810.85 + [err] model training instances: 20686.54878818 + [avg] Change detected: 386 + [err] Change detected: 1220.63917682 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 128.4 + [err] tree size (nodes): 61.28476338 + [avg] tree size (leaves): 88.7 + [err] tree size (leaves): 39.18914249 + [avg] active learning leaves: 88.7 + [err] active learning leaves: 39.18914249 + [avg] tree depth: 4.8 + [err] tree depth: 1.87379591 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 100000 +Votes + 0: 5.56917916 + 1: 3.43255925 +Measurements + classified instances: 99999 + classifications correct (percent): 89.3698937 + Kappa Statistic (percent): 78.16336356 + Kappa Temporal Statistic (percent): 78.10234015 + Kappa M Statistic (percent): 74.79967759 +Model measurements + model training instances: 99999 + ensemble size: 10 + [avg] model training instances: 90333.05 + [err] model training instances: 22766.54915597 + [avg] Change detected: 362.5 + [err] Change detected: 1146.32565181 + [avg] Warning detected: 0 + [err] Warning detected: 0 + [avg] tree size (nodes): 148 + [err] tree size (nodes): 63.70068899 + [avg] tree size (leaves): 102.1 + [err] tree size (leaves): 41.50354737 + [avg] active learning leaves: 102.1 + [err] active learning leaves: 41.50354737 + [avg] tree depth: 4.8 + [err] tree depth: 1.87379591 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + + + diff --git a/moa/src/test/resources/moa/classifiers/meta/BOLE.ref b/moa/src/test/resources/moa/classifiers/meta/BOLE.ref new file mode 100644 index 000000000..7def7ad3e --- /dev/null +++ b/moa/src/test/resources/moa/classifiers/meta/BOLE.ref @@ -0,0 +1,355 @@ +--> classification-out0.arff +moa.classifiers.meta.BOLE + +Index + 10000 +Votes + 0: 3.03848023 + 1: 2.88340964 +Measurements + classified instances: 9999 + classifications correct (percent): 74.94749475 + Kappa Statistic (percent): 48.16264129 + Kappa Temporal Statistic (percent): 47.19645868 + Kappa M Statistic (percent): 38.82783883 +Model measurements + model training instances: 9999 + ensemble size: 10 + [avg] model training instances: 8690.4 + [err] model training instances: 2016.58234592 + [avg] Change detected: 6.7 + [err] Change detected: 4.21768762 + [avg] Warning detected: 146.7 + [err] Warning detected: 56.83123163 + [avg] tree size (nodes): 12.4 + [err] tree size (nodes): 7.27552976 + [avg] tree size (leaves): 10 + [err] tree size (leaves): 5.59761854 + [avg] active learning leaves: 10 + [err] active learning leaves: 5.59761854 + [avg] tree depth: 1.8 + [err] tree depth: 0.91893658 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 20000 +Votes + 0: 2.46748285 + 1: 4.19196964 +Measurements + classified instances: 19999 + classifications correct (percent): 79.76898845 + Kappa Statistic (percent): 58.39484147 + Kappa Temporal Statistic (percent): 57.76177054 + Kappa M Statistic (percent): 51.49844162 +Model measurements + model training instances: 19999 + ensemble size: 10 + [avg] model training instances: 18094.65 + [err] model training instances: 3333.4539583 + [avg] Change detected: 0.1 + [err] Change detected: 0.31622777 + [avg] Warning detected: 19.4 + [err] Warning detected: 23.4767024 + [avg] tree size (nodes): 34.6 + [err] tree size (nodes): 18.86325057 + [avg] tree size (leaves): 26.5 + [err] tree size (leaves): 13.61575884 + [avg] active learning leaves: 26.5 + [err] active learning leaves: 13.61575884 + [avg] tree depth: 2.7 + [err] tree depth: 0.67494856 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 30000 +Votes + 0: 0.27354188 + 1: 7.39019223 +Measurements + classified instances: 29999 + classifications correct (percent): 82.66275543 + Kappa Statistic (percent): 64.32411446 + Kappa Temporal Statistic (percent): 64.07404849 + Kappa M Statistic (percent): 58.35201794 +Model measurements + model training instances: 29999 + ensemble size: 10 + [avg] model training instances: 27337.25 + [err] model training instances: 4110.95082964 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 7.4 + [err] Warning detected: 9.66321778 + [avg] tree size (nodes): 52.9 + [err] tree size (nodes): 18.91178351 + [avg] tree size (leaves): 39.1 + [err] tree size (leaves): 12.26059632 + [avg] active learning leaves: 39.1 + [err] active learning leaves: 12.26059632 + [avg] tree depth: 3.3 + [err] tree depth: 0.67494856 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 40000 +Votes + 0: 2.21997743 + 1: 5.90869048 +Measurements + classified instances: 39999 + classifications correct (percent): 84.31460787 + Kappa Statistic (percent): 67.79601989 + Kappa Temporal Statistic (percent): 67.57790295 + Kappa M Statistic (percent): 62.47607656 +Model measurements + model training instances: 39999 + ensemble size: 10 + [avg] model training instances: 35792.75 + [err] model training instances: 9376.33981935 + [avg] Change detected: 0.3 + [err] Change detected: 0.9486833 + [avg] Warning detected: 13.2 + [err] Warning detected: 28.78965864 + [avg] tree size (nodes): 60.9 + [err] tree size (nodes): 33.48117216 + [avg] tree size (leaves): 43.7 + [err] tree size (leaves): 22.13117459 + [avg] active learning leaves: 43.7 + [err] active learning leaves: 22.13117459 + [avg] tree depth: 3.7 + [err] tree depth: 1.41813649 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 50000 +Votes + 0: 4.28259731 + 1: 4.08632595 +Measurements + classified instances: 49999 + classifications correct (percent): 85.45970919 + Kappa Statistic (percent): 70.14361818 + Kappa Temporal Statistic (percent): 69.90644921 + Kappa M Statistic (percent): 65.20865237 +Model measurements + model training instances: 49999 + ensemble size: 10 + [avg] model training instances: 45527.85 + [err] model training instances: 10048.55585474 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 6.3 + [err] Warning detected: 9.522488 + [avg] tree size (nodes): 71.9 + [err] tree size (nodes): 31.62084404 + [avg] tree size (leaves): 51.3 + [err] tree size (leaves): 19.64716547 + [avg] active learning leaves: 51.3 + [err] active learning leaves: 19.64716547 + [avg] tree depth: 4.3 + [err] tree depth: 0.67494856 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 60000 +Votes + 0: 0.79328783 + 1: 7.76270847 +Measurements + classified instances: 59999 + classifications correct (percent): 86.23310389 + Kappa Statistic (percent): 71.78216485 + Kappa Temporal Statistic (percent): 71.53196622 + Kappa M Statistic (percent): 67.22482343 +Model measurements + model training instances: 59999 + ensemble size: 10 + [avg] model training instances: 55481.65 + [err] model training instances: 10914.72389542 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 6 + [err] Warning detected: 10.20892855 + [avg] tree size (nodes): 86.2 + [err] tree size (nodes): 36.09801472 + [avg] tree size (leaves): 61 + [err] tree size (leaves): 22.28601953 + [avg] active learning leaves: 61 + [err] active learning leaves: 22.28601953 + [avg] tree depth: 4.5 + [err] tree depth: 0.84983659 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 70000 +Votes + 0: 1.3948847 + 1: 7.34491032 +Measurements + classified instances: 69999 + classifications correct (percent): 86.87838398 + Kappa Statistic (percent): 73.1286334 + Kappa Temporal Statistic (percent): 72.95267823 + Kappa M Statistic (percent): 68.86862798 +Model measurements + model training instances: 69999 + ensemble size: 10 + [avg] model training instances: 65189.15 + [err] model training instances: 11964.04803997 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 3.6 + [err] Warning detected: 6.00370256 + [avg] tree size (nodes): 102.7 + [err] tree size (nodes): 44.44484722 + [avg] tree size (leaves): 72.4 + [err] tree size (leaves): 28.44956551 + [avg] active learning leaves: 72.4 + [err] active learning leaves: 28.44956551 + [avg] tree depth: 4.7 + [err] tree depth: 0.9486833 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 80000 +Votes + 0: 2.87689943 + 1: 6.07641818 +Measurements + classified instances: 79999 + classifications correct (percent): 87.4723434 + Kappa Statistic (percent): 74.34195208 + Kappa Temporal Statistic (percent): 74.20268218 + Kappa M Statistic (percent): 70.27524024 +Model measurements + model training instances: 79999 + ensemble size: 10 + [avg] model training instances: 70595.15 + [err] model training instances: 19111.19207573 + [avg] Change detected: 0.1 + [err] Change detected: 0.31622777 + [avg] Warning detected: 3.9 + [err] Warning detected: 9.36245457 + [avg] tree size (nodes): 107.2 + [err] tree size (nodes): 58.09532397 + [avg] tree size (leaves): 74.5 + [err] tree size (leaves): 37.64527416 + [avg] active learning leaves: 74.5 + [err] active learning leaves: 37.64527416 + [avg] tree depth: 4.6 + [err] tree depth: 1.17378779 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 90000 +Votes + 0: 0.15679556 + 1: 8.99272524 +Measurements + classified instances: 89999 + classifications correct (percent): 87.89542106 + Kappa Statistic (percent): 75.20133022 + Kappa Temporal Statistic (percent): 75.06237839 + Kappa M Statistic (percent): 71.27109705 +Model measurements + model training instances: 89999 + ensemble size: 10 + [avg] model training instances: 80167.25 + [err] model training instances: 19524.1066751 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 3.5 + [err] Warning detected: 10.72121469 + [avg] tree size (nodes): 123.9 + [err] tree size (nodes): 59.07706831 + [avg] tree size (leaves): 85.7 + [err] tree size (leaves): 38.00307005 + [avg] active learning leaves: 85.7 + [err] active learning leaves: 38.00307005 + [avg] tree depth: 4.9 + [err] tree depth: 0.87559504 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + +Index + 100000 +Votes + 0: 5.79266118 + 1: 3.54275315 +Measurements + classified instances: 99999 + classifications correct (percent): 88.25888259 + Kappa Statistic (percent): 75.95161595 + Kappa Temporal Statistic (percent): 75.81369479 + Kappa M Statistic (percent): 72.16585273 +Model measurements + model training instances: 99999 + ensemble size: 10 + [avg] model training instances: 89444.85 + [err] model training instances: 19904.52634129 + [avg] Change detected: 0 + [err] Change detected: 0 + [avg] Warning detected: 5.1 + [err] Warning detected: 8.62103371 + [avg] tree size (nodes): 148 + [err] tree size (nodes): 62.43218543 + [avg] tree size (leaves): 102.7 + [err] tree size (leaves): 40.82496513 + [avg] active learning leaves: 102.7 + [err] active learning leaves: 40.82496513 + [avg] tree depth: 5.1 + [err] tree depth: 1.10050493 + [avg] active leaf byte size estimate: 0 + [err] active leaf byte size estimate: 0 + [avg] inactive leaf byte size estimate: 0 + [err] inactive leaf byte size estimate: 0 + [avg] byte size estimate overhead: 1 + [err] byte size estimate overhead: 0 + + +