diff --git a/pom.xml b/pom.xml index 90d6a5f1..adc07567 100644 --- a/pom.xml +++ b/pom.xml @@ -145,6 +145,7 @@ 3.4.6 1.7.7 + 2.2.4 diff --git a/samoa-api/src/main/java/org/apache/samoa/core/SerializableInstance.java b/samoa-api/src/main/java/org/apache/samoa/core/SerializableInstance.java index 92ef4645..81dcbccc 100644 --- a/samoa-api/src/main/java/org/apache/samoa/core/SerializableInstance.java +++ b/samoa-api/src/main/java/org/apache/samoa/core/SerializableInstance.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; /** * License diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicClassificationPerformanceEvaluator.java index a77831a7..248569fd 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicClassificationPerformanceEvaluator.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicClassificationPerformanceEvaluator.java @@ -1,6 +1,5 @@ package org.apache.samoa.evaluation; -import java.util.Arrays; import java.util.List; import org.apache.samoa.instances.Attribute; @@ -25,7 +24,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.instances.Utils; import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicRegressionPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicRegressionPerformanceEvaluator.java index ab169049..49ec4a95 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicRegressionPerformanceEvaluator.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/BasicRegressionPerformanceEvaluator.java @@ -1,9 +1,5 @@ package org.apache.samoa.evaluation; -import java.util.List; - -import org.apache.samoa.instances.Attribute; - /* * #%L * SAMOA @@ -24,9 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Utils; -import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.instances.instances.Instance; + import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.core.Measurement; import org.apache.samoa.moa.core.Vote; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/ClusteringEvaluationContentEvent.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/ClusteringEvaluationContentEvent.java index 67bdeec1..31a4af1f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/ClusteringEvaluationContentEvent.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/ClusteringEvaluationContentEvent.java @@ -20,7 +20,6 @@ * #L% */ import org.apache.samoa.core.*; -import org.apache.samoa.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/EvaluationDistributorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/EvaluationDistributorProcessor.java index a243a103..bd6e2217 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/EvaluationDistributorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/EvaluationDistributorProcessor.java @@ -27,7 +27,7 @@ import com.google.common.base.Preconditions; import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.moa.core.MiscUtils; import org.apache.samoa.topology.Stream; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java index d54296d4..d6e2b60f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java @@ -23,7 +23,7 @@ */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.instances.Utils; import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/PerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/PerformanceEvaluator.java index c4c4a0b1..bbf9752e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/PerformanceEvaluator.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/PerformanceEvaluator.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.MOAObject; import org.apache.samoa.moa.core.Measurement; import org.apache.samoa.moa.core.Vote; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/WindowClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/WindowClassificationPerformanceEvaluator.java index 6ea40ed6..2d028fa9 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/WindowClassificationPerformanceEvaluator.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/WindowClassificationPerformanceEvaluator.java @@ -24,7 +24,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.instances.Utils; import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM_GTAnalysis.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM_GTAnalysis.java index a48c0549..9db8ee1f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM_GTAnalysis.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM_GTAnalysis.java @@ -24,7 +24,7 @@ import java.util.HashMap; import java.util.Iterator; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.AutoExpandVector; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/General.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/General.java index 1a9ca1d5..308a3c02 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/General.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/General.java @@ -22,7 +22,7 @@ import java.util.ArrayList; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.cluster.SphereCluster; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/SSQ.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/SSQ.java index 8ee6a43c..44d5ee2e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/SSQ.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/SSQ.java @@ -21,7 +21,7 @@ */ import java.util.ArrayList; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.DataPoint; import org.apache.samoa.moa.evaluation.MeasureCollection; diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/Separation.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/Separation.java index 0d8710a5..8e3a7f74 100644 --- a/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/Separation.java +++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/measures/Separation.java @@ -22,8 +22,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Cluster; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.cluster.SphereCluster; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContent.java b/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContent.java index c4e6b841..6c02cca2 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContent.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContent.java @@ -27,7 +27,7 @@ import net.jcip.annotations.Immutable; import org.apache.samoa.core.SerializableInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import java.io.Serializable; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContentEvent.java b/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContentEvent.java index c1a1de60..867ec0a3 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContentEvent.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/InstanceContentEvent.java @@ -25,8 +25,7 @@ */ import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.SerializableInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import net.jcip.annotations.Immutable; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/Learner.java b/samoa-api/src/main/java/org/apache/samoa/learners/Learner.java index a19943e7..248cb577 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/Learner.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/Learner.java @@ -24,7 +24,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.topology.Stream; import org.apache.samoa.topology.TopologyBuilder; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/ResultContentEvent.java b/samoa-api/src/main/java/org/apache/samoa/learners/ResultContentEvent.java index 3ede55c7..3b700f8a 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/ResultContentEvent.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/ResultContentEvent.java @@ -22,7 +22,7 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.SerializableInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * License diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearner.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearner.java index 6d6a6641..34aec71b 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearner.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearner.java @@ -21,10 +21,9 @@ */ import java.io.Serializable; -import java.util.Map; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; /** * Learner interface for non-distributed learners. diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearnerProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearnerProcessor.java index 5e2c927e..ba302e62 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearnerProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/LocalLearnerProcessor.java @@ -26,7 +26,7 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.moa.classifiers.core.driftdetection.ChangeDetector; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/NaiveBayes.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/NaiveBayes.java index df24cd5e..0a737217 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/NaiveBayes.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/NaiveBayes.java @@ -23,8 +23,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.moa.classifiers.core.attributeclassobservers.GaussianNumericAttributeClassObserver; import org.apache.samoa.moa.core.GaussianEstimator; import org.slf4j.Logger; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SimpleClassifierAdapter.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SimpleClassifierAdapter.java index 8db84821..655e173e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SimpleClassifierAdapter.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SimpleClassifierAdapter.java @@ -22,9 +22,9 @@ /** * License */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.classifiers.functions.MajorityClass; import com.github.javacliparser.ClassOption; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java index b2a09da3..965e90fc 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java @@ -29,10 +29,9 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.AdaptiveLearner; import org.apache.samoa.learners.ClassificationLearner; -import org.apache.samoa.learners.Learner; import org.apache.samoa.moa.classifiers.core.driftdetection.ChangeDetector; import org.apache.samoa.topology.Stream; import org.apache.samoa.topology.TopologyBuilder; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java index 66808629..ca3d3248 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java @@ -29,10 +29,9 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.AdaptiveLearner; import org.apache.samoa.learners.ClassificationLearner; -import org.apache.samoa.learners.Learner; import org.apache.samoa.learners.classifiers.trees.VerticalHoeffdingTree; import org.apache.samoa.moa.classifiers.core.driftdetection.ADWINChangeDetector; import org.apache.samoa.moa.classifiers.core.driftdetection.ChangeDetector; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java index 967684ff..bc566e04 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java @@ -27,7 +27,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.ClassificationLearner; import org.apache.samoa.learners.Learner; import org.apache.samoa.learners.classifiers.trees.VerticalHoeffdingTree; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BaggingDistributorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BaggingDistributorProcessor.java index a49065b0..d757ca61 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BaggingDistributorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BaggingDistributorProcessor.java @@ -29,7 +29,7 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.moa.core.MiscUtils; import org.apache.samoa.topology.Stream; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java index 65120281..6c8c90bc 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java @@ -29,7 +29,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.ClassificationLearner; import org.apache.samoa.learners.Learner; import org.apache.samoa.learners.classifiers.SingleClassifier; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BoostingPredictionCombinerProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BoostingPredictionCombinerProcessor.java index 6cfcfae0..227a8345 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BoostingPredictionCombinerProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BoostingPredictionCombinerProcessor.java @@ -28,7 +28,7 @@ import java.util.Random; import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.moa.core.DoubleVector; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Sharding.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Sharding.java index 588d9f2d..95ce2171 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Sharding.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Sharding.java @@ -23,7 +23,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.Learner; import org.apache.samoa.learners.classifiers.trees.VerticalHoeffdingTree; import org.apache.samoa.topology.Stream; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/ShardingDistributorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/ShardingDistributorProcessor.java index 0e936d76..bf80cc5b 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/ShardingDistributorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/ShardingDistributorProcessor.java @@ -29,7 +29,7 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.topology.Stream; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/AMRulesRegressor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/AMRulesRegressor.java index 58d3eb6b..5ca04904 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/AMRulesRegressor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/AMRulesRegressor.java @@ -25,7 +25,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.RegressionLearner; import org.apache.samoa.learners.classifiers.rules.centralized.AMRulesRegressorProcessor; import org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers.FIMTDDNumericAttributeClassLimitObserver; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/HorizontalAMRulesRegressor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/HorizontalAMRulesRegressor.java index 822c2be6..790bba03 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/HorizontalAMRulesRegressor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/HorizontalAMRulesRegressor.java @@ -25,7 +25,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.RegressionLearner; import org.apache.samoa.learners.classifiers.rules.distributed.AMRDefaultRuleProcessor; import org.apache.samoa.learners.classifiers.rules.distributed.AMRLearnerProcessor; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/VerticalAMRulesRegressor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/VerticalAMRulesRegressor.java index 2fb5c2d4..a2414f3d 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/VerticalAMRulesRegressor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/VerticalAMRulesRegressor.java @@ -22,7 +22,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.RegressionLearner; import org.apache.samoa.learners.classifiers.rules.distributed.AMRulesAggregatorProcessor; import org.apache.samoa.learners.classifiers.rules.distributed.AMRulesStatisticsProcessor; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java index 55f00644..18b07aa2 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java index 5c41215a..3b40809d 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.DoubleVector; import org.apache.samoa.moa.core.StringUtils; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java index 1b6c9d24..e86e0aba 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java @@ -22,7 +22,7 @@ import java.io.Serializable; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.AbstractClassifier; import org.apache.samoa.moa.classifiers.Regressor; import org.apache.samoa.moa.core.DoubleVector; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java index 971f00fe..80e9c315 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java @@ -23,7 +23,7 @@ import java.util.LinkedList; import java.util.List; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java index 11df518b..e3f427f6 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java @@ -24,7 +24,7 @@ import java.util.LinkedList; import java.util.List; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; import org.apache.samoa.moa.classifiers.core.attributeclassobservers.AttributeClassObserver; import org.apache.samoa.moa.classifiers.core.attributeclassobservers.FIMTDDNumericAttributeClassObserver; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java index 25d634e2..7c1f3a11 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.DoubleVector; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java index 512ebab0..01db1523 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java @@ -22,7 +22,7 @@ import java.io.Serializable; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.DoubleVector; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java index afb5b4ec..5faaf7ac 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.learners.classifiers.trees.SplitNode; import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; import org.apache.samoa.moa.classifiers.rules.core.Predicate; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/TargetMean.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/TargetMean.java index 1d80a029..38f0bb98 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/TargetMean.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/TargetMean.java @@ -46,7 +46,7 @@ * * */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.AbstractClassifier; import org.apache.samoa.moa.classifiers.Regressor; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRDefaultRuleProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRDefaultRuleProcessor.java index 9f2b9c20..f0c9860e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRDefaultRuleProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRDefaultRuleProcessor.java @@ -22,8 +22,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRLearnerProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRLearnerProcessor.java index a7189452..166758d2 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRLearnerProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRLearnerProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; import org.apache.samoa.learners.classifiers.rules.common.LearningRule; import org.apache.samoa.learners.classifiers.rules.common.RuleActiveRegressionNode; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRRuleSetProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRRuleSetProcessor.java index beb7e405..ff3e9826 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRRuleSetProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRRuleSetProcessor.java @@ -24,8 +24,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesAggregatorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesAggregatorProcessor.java index 2131db49..035a7def 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesAggregatorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesAggregatorProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.ResultContentEvent; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesStatisticsProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesStatisticsProcessor.java index 86fae3c6..d105f01e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesStatisticsProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AMRulesStatisticsProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; import org.apache.samoa.learners.classifiers.rules.common.RuleActiveRegressionNode; import org.apache.samoa.learners.classifiers.rules.common.RulePassiveRegressionNode; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java index 0c603f89..0c3a2f33 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java @@ -21,7 +21,7 @@ */ import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * Forwarded instances from Model Agrregator to Learners/Default Rule Learner. diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ActiveLearningNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ActiveLearningNode.java index a4377196..4ab751a7 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ActiveLearningNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ActiveLearningNode.java @@ -23,7 +23,7 @@ import java.util.HashMap; import java.util.Map; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/FilterProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/FilterProcessor.java index 2c81fd06..21ceaf25 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/FilterProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/FilterProcessor.java @@ -22,9 +22,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.learners.InstancesContentEvent; import org.apache.samoa.learners.ResultContentEvent; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/InactiveLearningNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/InactiveLearningNode.java index e4df5778..0ccbfe11 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/InactiveLearningNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/InactiveLearningNode.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * Class that represents inactive learning node. Inactive learning node is a node which only keeps track of the observed diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/LearningNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/LearningNode.java index 9b0480ce..915dca95 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/LearningNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/LearningNode.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * Abstract class that represents a learning node diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ModelAggregatorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ModelAggregatorProcessor.java index 967682a5..9950b79e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ModelAggregatorProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/ModelAggregatorProcessor.java @@ -38,9 +38,9 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.learners.InstanceContent; import org.apache.samoa.learners.InstancesContentEvent; import org.apache.samoa.learners.ResultContentEvent; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/Node.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/Node.java index 898a4339..b8ee7b86 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/Node.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/Node.java @@ -21,7 +21,7 @@ */ import org.apache.samoa.core.DoubleVector; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * Abstract class that represents a node in the tree model. diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/SplitNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/SplitNode.java index c2b1a476..e0ff9b6c 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/SplitNode.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/SplitNode.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; import org.apache.samoa.moa.core.AutoExpandVector; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/VerticalHoeffdingTree.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/VerticalHoeffdingTree.java index 6534ceed..ea754c2d 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/VerticalHoeffdingTree.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/trees/VerticalHoeffdingTree.java @@ -23,7 +23,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.AdaptiveLearner; import org.apache.samoa.learners.ClassificationLearner; import org.apache.samoa.moa.classifiers.core.attributeclassobservers.AttributeClassObserver; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClusteringContentEvent.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClusteringContentEvent.java index e7eb5b55..86de0cc2 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClusteringContentEvent.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClusteringContentEvent.java @@ -20,7 +20,7 @@ * #L% */ import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import net.jcip.annotations.Immutable; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClustreamClustererAdapter.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClustreamClustererAdapter.java index e0c1cb3d..ee6915af 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClustreamClustererAdapter.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/ClustreamClustererAdapter.java @@ -22,9 +22,9 @@ /** * License */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.clusterers.clustream.Clustream; @@ -82,10 +82,7 @@ public ClustreamClustererAdapter(org.apache.samoa.moa.clusterers.Clusterer learn /** * Instantiates a new learner. * - * @param learner - * the learner - * @param dataset - * the dataset + * */ public ClustreamClustererAdapter() { this.learner = ((org.apache.samoa.moa.clusterers.Clusterer) this.learnerOption.getValue()).copy(); diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererAdapter.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererAdapter.java index 4a3e5e9b..411d688a 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererAdapter.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererAdapter.java @@ -22,8 +22,8 @@ import java.io.Serializable; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.moa.cluster.Clustering; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererProcessor.java index 163184ff..7fd9895a 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/LocalClustererProcessor.java @@ -24,10 +24,9 @@ */ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.evaluation.ClusteringEvaluationContentEvent; import org.apache.samoa.evaluation.ClusteringResultContentEvent; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.DataPoint; import org.apache.samoa.topology.Stream; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/SingleLearner.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/SingleLearner.java index f6173f67..bfb472c3 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/SingleLearner.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/SingleLearner.java @@ -29,7 +29,7 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.Learner; import org.apache.samoa.topology.Stream; import org.apache.samoa.topology.TopologyBuilder; diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/simple/DistributedClusterer.java b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/simple/DistributedClusterer.java index 8f3537a5..aa0fe36d 100644 --- a/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/simple/DistributedClusterer.java +++ b/samoa-api/src/main/java/org/apache/samoa/learners/clusterers/simple/DistributedClusterer.java @@ -29,10 +29,9 @@ import java.util.Set; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.Learner; import org.apache.samoa.learners.clusterers.*; -import org.apache.samoa.topology.ProcessingItem; import org.apache.samoa.topology.Stream; import org.apache.samoa.topology.TopologyBuilder; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/AbstractClassifier.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/AbstractClassifier.java index 4ca4c0f7..e0789fdf 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/AbstractClassifier.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/AbstractClassifier.java @@ -25,8 +25,8 @@ import java.util.List; import java.util.Random; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.MOAObject; import org.apache.samoa.moa.core.Example; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/Classifier.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/Classifier.java index cd408885..171b46e3 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/Classifier.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/Classifier.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.Example; import org.apache.samoa.moa.learners.Learner; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java index 1d228e08..d3331933 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.AbstractMOAObject; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java index 1190aaa2..6abfcffd 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; /** * Nominal binary conditional test for instances to use to split nodes in Hoeffding trees. diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java index f4816a1b..d074aaf7 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; /** * Nominal multi way conditional test for instances to use to split nodes in Hoeffding trees. diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java index 3552869c..e8dff3d5 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; /** * Numeric binary conditional test for instances to use to split nodes in Hoeffding trees. diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java index 8474d88d..4ebff581 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.classifiers.AbstractClassifier; import org.apache.samoa.moa.core.DoubleVector; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java index a744897c..7f479730 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /** * Interface for a predicate (a feature) in rules. diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java index a2b04bd5..3e0ee51e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java @@ -39,8 +39,8 @@ */ package org.apache.samoa.moa.classifiers.rules.core.conditionaltests; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalBinaryTest; import org.apache.samoa.moa.classifiers.rules.core.Predicate; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java index bffb7763..68992cda 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java @@ -21,7 +21,7 @@ */ import java.util.Arrays; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; /* micro cluster, as defined by Aggarwal et al, On Clustering Massive Data Streams: A Summarization Praradigm * in the book Data streams : models and algorithms, by Charu C Aggarwal diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java index a700641a..493f6cd5 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java @@ -25,7 +25,7 @@ import java.util.Map; import java.util.Random; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.AbstractMOAObject; public abstract class Cluster extends AbstractMOAObject { diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java index 7c2a9c8a..040372ac 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java @@ -26,7 +26,7 @@ import java.util.List; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.AbstractMOAObject; import org.apache.samoa.moa.core.AutoExpandVector; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java index fdf2af62..a87ac1c4 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java @@ -23,8 +23,8 @@ import java.util.List; import java.util.Random; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; /** * A simple implementation of the Cluster interface representing spherical clusters. The inclusion diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java index 02a5191e..b73046bc 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java @@ -24,9 +24,9 @@ import java.util.List; import java.util.Random; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.Measurement; import org.apache.samoa.moa.core.ObjectRepository; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java index b9f80b53..1d85a820 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java @@ -24,7 +24,7 @@ import java.util.Arrays; import java.util.Random; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.cluster.SphereCluster; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java index bfa07c86..da3fe5ab 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java @@ -20,8 +20,8 @@ * #L% */ -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.MOAObject; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.Measurement; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java index 055c3d6a..ff9a207d 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java @@ -25,8 +25,8 @@ import java.util.List; import java.util.Random; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.Cluster; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.cluster.SphereCluster; diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java index 522d50ec..eb0aa076 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java @@ -21,7 +21,7 @@ */ import java.util.List; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.CFCluster; public class ClustreamKernel extends CFCluster { diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java index cf54dd8d..f027cff2 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java @@ -26,8 +26,8 @@ import java.util.List; import java.util.Random; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.cluster.CFCluster; import org.apache.samoa.moa.cluster.Cluster; import org.apache.samoa.moa.cluster.Clustering; @@ -217,7 +217,8 @@ private static double distance(double[] pointA, double[] pointB) { * k-means of (micro)clusters, with ground-truth-aided initialization. (to produce best results) * * @param k - * @param data + * @param clustering + * @param gtClustering * @return (macro)clustering - CFClusters */ public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { @@ -256,7 +257,7 @@ public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtC * k-means of (micro)clusters, with randomized initialization. * * @param k - * @param data + * @param clustering * @return (macro)clustering - CFClusters */ public static Clustering kMeans_rand(int k, Clustering clustering) { diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/DataPoint.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/DataPoint.java index e4e1c617..690b62ae 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/core/DataPoint.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/DataPoint.java @@ -25,8 +25,8 @@ import java.util.TreeSet; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; public class DataPoint extends DenseInstance { diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/InstanceExample.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/InstanceExample.java index 3ae89855..f34c002c 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/core/InstanceExample.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/InstanceExample.java @@ -22,7 +22,7 @@ import java.io.Serializable; -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; public class InstanceExample implements Example, Serializable { diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/Vote.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/Vote.java index 24ea3f31..9b02f847 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/core/Vote.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/Vote.java @@ -1,5 +1,26 @@ package org.apache.samoa.moa.core; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + import java.io.Serializable; /* diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java b/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java index 8905ea5b..63112595 100644 --- a/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java +++ b/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.MOAObject; import org.apache.samoa.moa.core.Example; import org.apache.samoa.moa.core.Measurement; @@ -74,7 +74,7 @@ public interface Learner extends MOAObject, OptionHandler { /** * Trains this learner incrementally using the given example. * - * @param inst + * @param example * the instance to be used for training */ public void trainOnInstance(E example); @@ -83,7 +83,7 @@ public interface Learner extends MOAObject, OptionHandler { * Predicts the class memberships for a given instance. If an instance is unclassified, the returned array elements * must be all zero. * - * @param inst + * @param example * the instance to be classified * @return an array containing the estimated membership probabilities of the test instance in each class */ diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java index 9ee831eb..2acbe3d5 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java @@ -24,13 +24,16 @@ import java.io.IOException; import java.io.InputStreamReader; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.loaders.ArffLoader; +import org.apache.samoa.instances.loaders.AvroBinaryLoader; +import org.apache.samoa.instances.loaders.LoaderFactory; +import org.apache.samoa.instances.loaders.LoaderType; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.tasks.TaskMonitor; import com.github.javacliparser.FileOption; -import com.github.javacliparser.IntOption; /** * InstanceStream for ARFF file @@ -84,7 +87,13 @@ protected boolean getNextFileStream() { return false; this.fileReader = new BufferedReader(new InputStreamReader(this.inputStream)); - this.instances = new Instances(this.fileReader, 1, -1); + + LoaderFactory loaderFactory = new LoaderFactory(); + ArffLoader arffLoader = (ArffLoader) loaderFactory.createLoader(LoaderType.ARFF_LOADER, classIndexOption.getValue()); + arffLoader.setupStreamTokenizer(this.fileReader, null); + this.instances = new Instances(arffLoader); + + //this.instances = new Instances(this.fileReader, 1, -1); if (this.classIndexOption.getValue() < 0) { this.instances.setClassIndex(this.instances.numAttributes() - 1); } else if (this.classIndexOption.getValue() > 0) { @@ -97,7 +106,7 @@ protected boolean getNextFileStream() { @Override protected boolean readNextInstanceFromFile() { try { - if (this.instances.readInstance(this.fileReader)) { + if (this.instances.readInstance()) { this.lastInstanceRead = new InstanceExample(this.instances.instance(0)); this.instances.delete(); // keep instances clean return true; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java index 7c575d05..b48112bf 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java @@ -21,9 +21,11 @@ */ import java.io.IOException; -import java.io.InputStream; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.loaders.*; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.loaders.LoaderFactory; +import org.apache.samoa.instances.loaders.LoaderType; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.tasks.TaskMonitor; @@ -31,7 +33,6 @@ import org.slf4j.LoggerFactory; import com.github.javacliparser.FileOption; -import com.github.javacliparser.IntOption; import com.github.javacliparser.StringOption; /** @@ -53,6 +54,10 @@ public class AvroFileStream extends FileStream { /** Represents the last read Instance **/ protected InstanceExample lastInstanceRead; + protected static enum AVRO_ENCODING_FORMAT { + JSON, BINARY + } + /** Represents the binary input stream of avro data **/ //protected transient InputStream inputStream = null; @@ -101,7 +106,16 @@ protected boolean getNextFileStream() { if (inputStream == null) return false; - this.instances = new Instances(this.inputStream, classIndexOption.getValue(), encodingFormatOption.getValue()); + LoaderFactory loaderFactory = new LoaderFactory(); + AvroLoader avroLoader; + if( encodingFormatOption.getValue().equals(AVRO_ENCODING_FORMAT.BINARY.toString())){ + avroLoader = (AvroLoader) loaderFactory.createLoader(LoaderType.AVRO_BINARY_LOADER, classIndexOption.getValue()); + }else{ + avroLoader = (AvroLoader) loaderFactory.createLoader(LoaderType.AVRO_JSON_LOADER, classIndexOption.getValue()); + } + avroLoader.initializeSchema(inputStream); + this.instances = new Instances(avroLoader); + if (this.classIndexOption.getValue() < 0) { this.instances.setClassIndex(this.instances.numAttributes() - 1); diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/ClusteringEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/ClusteringEntranceProcessor.java index 3c0d76e8..6527587c 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/ClusteringEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/ClusteringEntranceProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.EntranceProcessor; import org.apache.samoa.core.Processor; import org.apache.samoa.evaluation.ClusteringEvaluationContentEvent; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.clusterers.ClusteringContentEvent; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.core.DataPoint; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/ExampleStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/ExampleStream.java index b0cbe11b..c1a8e07f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/ExampleStream.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/ExampleStream.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.MOAObject; import org.apache.samoa.moa.core.Example; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java index 05dcb1a0..bf705186 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java @@ -24,8 +24,8 @@ import com.github.javacliparser.FloatOption; import com.github.javacliparser.IntOption; import com.github.javacliparser.ListOption; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.options.AbstractOptionHandler; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/InstanceStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/InstanceStream.java index bf9e7afb..2ac96eeb 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/InstanceStream.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/InstanceStream.java @@ -20,7 +20,7 @@ * #L% */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.Example; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/KafkaStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/KafkaStream.java new file mode 100644 index 00000000..c0a4390e --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/KafkaStream.java @@ -0,0 +1,191 @@ +package org.apache.samoa.streams; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.IntOption; +import com.github.javacliparser.StringOption; +import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.kafka.common.errors.InvalidTopicException; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; +import org.apache.samoa.instances.kafka.KafkaAvroMapper; +import org.apache.samoa.instances.kafka.KafkaConsumerThread; +import org.apache.samoa.instances.kafka.KafkaDeserializer; +import org.apache.samoa.instances.loaders.AvroLoader; +import org.apache.samoa.instances.loaders.KafkaLoader; +import org.apache.samoa.instances.loaders.LoaderFactory; +import org.apache.samoa.instances.loaders.LoaderType; +import org.apache.samoa.learners.InstanceContentEvent; +import org.apache.samoa.moa.core.Example; +import org.apache.samoa.moa.core.InstanceExample; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; +import org.apache.samoa.streams.kafka.KafkaEntranceProcessor; +import org.apache.samoa.instances.kafka.KafkaJsonMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Properties; + +public class KafkaStream extends AbstractOptionHandler implements InstanceStream { + + private static final Logger logger = LoggerFactory.getLogger(KafkaStream.class); + private static final char TOPIC_SEPARATOR = '-'; + + public StringOption hostOption = new StringOption("host", 'h', "Kafka host address", "127.0.0.1"); + public StringOption portOption = new StringOption("port", 'p', "Kafka port address", "9092"); + public StringOption topicOption = new StringOption("topic", 't', "Kafka topic name", "samoa_ff"); + public IntOption timeoutOption = new IntOption("timeout", 'e', "Kafka timeout", 1000, 0, Integer.MAX_VALUE); + + /** + * + */ + private static final long serialVersionUID = -4387950661589472853L; + + protected Instances instances; + protected InstanceExample lastInstanceRead; + + protected boolean hitEndOfStream; + + private KafkaLoader kafkaLoader; + + @Override + public InstancesHeader getHeader() { + return new InstancesHeader(this.instances); + } + + @Override + public long estimatedRemainingInstances() { + return -1; + } + + @Override + public boolean hasMoreInstances() { + + return true; + } + + protected boolean readNextInstance() { + logger.info("Reading next instance"); + + this.instances = getDataset(); + if (this.instances != null) { + this.lastInstanceRead = new InstanceExample(this.instances.instance(0)); + this.instances.delete(); // keep instances clean + logger.info("Reading next instance successful"); + return true; + } + logger.info("Reading next instance unsuccessful"); + return false; + + } + + private KafkaDeserializer getDeserializer(String value) { + try { + String topicExtension = value.substring(value.lastIndexOf(TOPIC_SEPARATOR) + 1); + if(topicExtension.isEmpty()) + throw new InvalidTopicException("Invalid topic provided. Topic should contain an extension indicating data format, like topic-json or topic-avro."); + switch (topicExtension){ + case "json": + return new KafkaJsonMapper(Charset.defaultCharset()); + case "avro": + return new KafkaAvroMapper(); + default: + throw new InvalidTopicException("Unsupported data serialization provided"); + } + } catch(IndexOutOfBoundsException e){ + logger.error("Error parsing topic",e); + throw new InvalidTopicException("Invalid topic provided. Topic should contain an extension indicating data format, like topic-json or topic-avro."); + } + + } + + @Override + public Example nextInstance() { + if (this.lastInstanceRead == null) { + this.readNextInstance(); + } + Example ret = this.lastInstanceRead; + this.lastInstanceRead = null; + return ret; + } + + @Override + public boolean isRestartable() { + return true; + } + + @Override + public void restart() { + // TODO Auto-generated method stub + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + logger.info("Initializing KafkaLoader with host: " + this.hostOption.getValue() + ", port: " + this.portOption.getValue() + ", topic: " + this.topicOption.getValue()); + LoaderFactory loaderFactory = new LoaderFactory(); + // Create loader + kafkaLoader = (KafkaLoader) loaderFactory.createLoader(LoaderType.KAFKA_LOADER, -1); + + // Create and set deserializer + KafkaDeserializer kafkaDeserializer = getDeserializer(topicOption.getValue()); + kafkaLoader.setDeserializer(kafkaDeserializer); + + // Create, configure and start data fetching thread + kafkaLoader.setKafkaConsumerThread(new KafkaConsumerThread(getConsumerProperties(hostOption.getValue(), portOption.getValue()), Arrays.asList(topicOption.getValue()), timeoutOption.getValue())); + kafkaLoader.runKafkaConsumerThread(); + } + + protected Properties getConsumerProperties(String BROKERHOST, String BROKERPORT) { + Properties consumerProps = new Properties(); + consumerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); + consumerProps.put("enable.auto.commit", "true"); + consumerProps.put("auto.commit.interval.ms", "1000"); + consumerProps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); + consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); + consumerProps.setProperty("group.id", "test"); + consumerProps.setProperty("auto.offset.reset", "earliest"); + return consumerProps; + } + + private Instances getDataset() { + Instance instance = kafkaLoader.readInstance(); + if (instance != null) { + Instances ic = instance.dataset(); + ic.add(instance); + return ic; + } else { + logger.info("hasNext returned false!"); + return null; + } + + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/PrequentialSourceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/PrequentialSourceProcessor.java index 6e1598f6..a391898a 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/PrequentialSourceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/PrequentialSourceProcessor.java @@ -28,8 +28,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.EntranceProcessor; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.moa.options.AbstractOptionHandler; import org.slf4j.Logger; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/StreamSource.java b/samoa-api/src/main/java/org/apache/samoa/streams/StreamSource.java index 14684dba..491a6628 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/StreamSource.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/StreamSource.java @@ -24,7 +24,7 @@ * License */ -import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.instances.Instance; import org.apache.samoa.moa.core.Example; /** diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/StreamSourceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/StreamSourceProcessor.java index 82381343..d9452a01 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/StreamSourceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/StreamSourceProcessor.java @@ -26,8 +26,8 @@ import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.Processor; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.topology.Stream; import org.slf4j.Logger; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java index bdb3e730..8523bfa7 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java @@ -21,11 +21,14 @@ */ import com.github.javacliparser.IntOption; -import org.apache.samoa.instances.*; +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; +import org.apache.samoa.instances.instances.SparseInstance; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.options.AbstractOptionHandler; -import org.apache.samoa.streams.InstanceStream; import org.apache.samoa.moa.tasks.TaskMonitor; import java.util.ArrayList; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/clustering/RandomRBFGeneratorEvents.java b/samoa-api/src/main/java/org/apache/samoa/streams/clustering/RandomRBFGeneratorEvents.java index 139b8f79..799c8602 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/clustering/RandomRBFGeneratorEvents.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/clustering/RandomRBFGeneratorEvents.java @@ -29,15 +29,14 @@ import java.util.Vector; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.cluster.Clustering; import org.apache.samoa.moa.cluster.SphereCluster; import org.apache.samoa.moa.core.AutoExpandVector; import org.apache.samoa.moa.core.DataPoint; -import org.apache.samoa.moa.core.FastVector; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; import org.apache.samoa.moa.tasks.TaskMonitor; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/generators/HyperplaneGenerator.java b/samoa-api/src/main/java/org/apache/samoa/streams/generators/HyperplaneGenerator.java index 8274040b..6ca3a3ff 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/generators/HyperplaneGenerator.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/generators/HyperplaneGenerator.java @@ -23,10 +23,10 @@ import java.util.Random; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.core.Example; import org.apache.samoa.moa.core.FastVector; import org.apache.samoa.moa.core.InstanceExample; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/generators/RandomTreeGenerator.java b/samoa-api/src/main/java/org/apache/samoa/streams/generators/RandomTreeGenerator.java index 70dc4f89..4a276184 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/generators/RandomTreeGenerator.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/generators/RandomTreeGenerator.java @@ -25,10 +25,10 @@ import java.util.Random; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.moa.core.FastVector; import org.apache.samoa.moa.core.InstanceExample; import org.apache.samoa.moa.core.ObjectRepository; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java index 231e25dc..df58600b 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java @@ -32,11 +32,14 @@ */ +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; +import org.apache.samoa.instances.kafka.KafkaSerializer; +import org.apache.samoa.learners.InstanceContentEvent; + import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.Processor; /** * Destination processor that writes data to Apache Kafka @@ -68,7 +71,7 @@ public KafkaDestinationProcessor(Properties props, String topic, KafkaSerializer this.topic = topic; this.serializer = serializer; } - + private KafkaDestinationProcessor(KafkaUtils kafkaUtils, String topic, KafkaSerializer serializer){ this.kafkaUtils = kafkaUtils; this.topic = topic; @@ -78,12 +81,15 @@ private KafkaDestinationProcessor(KafkaUtils kafkaUtils, String topic, KafkaSeri @Override public boolean process(ContentEvent event) { try { - kafkaUtils.sendKafkaMessage(topic, serializer.serialize(event)); + //temporary solution + if(event instanceof InstanceContentEvent) { + kafkaUtils.sendKafkaMessage(topic, serializer.serialize(((InstanceContentEvent) event).getInstance())); + return true; + } } catch (Exception ex) { Logger.getLogger(KafkaEntranceProcessor.class.getName()).log(Level.SEVERE, null, ex); - return false; } - return true; + return false; } @Override diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index 866a4579..18fe1823 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -34,15 +34,19 @@ */ +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.core.Processor; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.kafka.KafkaDeserializer; +import org.apache.samoa.learners.InstanceContentEvent; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.EntranceProcessor; -import org.apache.samoa.core.Processor; /** * Entrance processor that reads incoming messages from Apache Kafka @@ -102,8 +106,11 @@ public boolean hasNext() { @Override public ContentEvent nextEvent() { - // assume this will never be called when buffer is empty! - return this.deserializer.deserialize(buffer.remove(0)); + // assume this will never be called when buffer is empty! + Instance instance = this.deserializer.deserialize(buffer.remove(0)); + // temporary solution for serialization merge + InstanceContentEvent instanceContentEvent = new InstanceContentEvent(-1, instance, true, true); + return instanceContentEvent; } @Override @@ -122,5 +129,5 @@ protected void finalize() throws Throwable { kafkaUtils.closeConsumer(); super.finalize(); } - + } diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index fb3aef71..a848620e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -30,6 +30,11 @@ * limitations under the License. * #L% */ + +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.samoa.instances.kafka.KafkaConsumerThread; + import java.util.Collection; import java.util.List; import java.util.Properties; @@ -38,8 +43,6 @@ import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.ProducerRecord; /** * Internal class responsible for Kafka Stream handling (both consume and @@ -48,7 +51,7 @@ * @author pwawrzyniak * @version 0.5.0-incubating-SNAPSHOT * @since 0.5.0-incubating - */ +// */ class KafkaUtils { private transient KafkaConsumerThread kafkaConsumerThread; @@ -59,9 +62,9 @@ class KafkaUtils { private final Properties consumerProperties; private final Properties producerProperties; - // Timeout for Kafka Consumer + // Timeout for Kafka Consumer private long consumerTimeout; - + /** * Class constructor @@ -92,9 +95,9 @@ public KafkaUtils(Properties consumerProperties, Properties producerProperties, * * @param topics List of Kafka topics that consumer should subscribe to */ - public void initializeConsumer(Collection topics) { + public void initializeConsumer(Collection topics) { kafkaConsumerThread = new KafkaConsumerThread(consumerProperties, topics, consumerTimeout); - kafkaConsumerThread.start(); + kafkaConsumerThread.start(); } public void closeConsumer() { @@ -113,7 +116,7 @@ public void closeProducer(){ producer.close(1, TimeUnit.MINUTES); } } - + /** * Method for reading new messages from Kafka topics * @@ -135,7 +138,7 @@ public long sendKafkaMessage(String topic, byte[] message) { } catch(InterruptedException | ExecutionException | TimeoutException e){ Logger.getLogger(KafkaUtils.class.getName()).log(Level.SEVERE, null, e); } - + } return -1; } diff --git a/samoa-api/src/main/java/org/apache/samoa/tasks/KafkaTask.java b/samoa-api/src/main/java/org/apache/samoa/tasks/KafkaTask.java index f0597a81..998bfa14 100644 --- a/samoa-api/src/main/java/org/apache/samoa/tasks/KafkaTask.java +++ b/samoa-api/src/main/java/org/apache/samoa/tasks/KafkaTask.java @@ -45,10 +45,10 @@ import com.github.javacliparser.StringOption; import java.text.SimpleDateFormat; import java.util.Date; -import org.apache.samoa.streams.kafka.KafkaDeserializer; +import org.apache.samoa.instances.kafka.KafkaDeserializer; import org.apache.samoa.streams.kafka.KafkaDestinationProcessor; import org.apache.samoa.streams.kafka.KafkaEntranceProcessor; -import org.apache.samoa.streams.kafka.KafkaSerializer; +import org.apache.samoa.instances.kafka.KafkaSerializer; /** * Kafka task diff --git a/samoa-api/src/main/resources/kafka.avsc b/samoa-api/src/main/resources/kafka.avsc index f5f12cf2..1037cc44 100644 --- a/samoa-api/src/main/resources/kafka.avsc +++ b/samoa-api/src/main/resources/kafka.avsc @@ -56,8 +56,8 @@ "name": "SingleLabelInstance", "fields": [ {"name": "weight", "type": "double"}, - {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, - {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} ] }, { @@ -66,8 +66,8 @@ "name": "DenseInstance", "fields": [ {"name": "weight", "type": "double"}, - {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, - {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} ] }, { @@ -76,8 +76,8 @@ "name": "SerializableInstance", "fields": [ {"name": "weight", "type": "double"}, - {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, - {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.instances.InstanceData", "org.apache.samoa.instances.instances.DenseInstanceData", "org.apache.samoa.instances.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} ] }, { diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java index 930ab236..74986ab4 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java @@ -31,6 +31,7 @@ * #L% */ import java.io.IOException; +import java.nio.charset.Charset; import java.nio.file.Files; import java.util.Arrays; import java.util.Iterator; @@ -54,7 +55,8 @@ import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.utils.Time; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.InstancesHeader; +import org.apache.samoa.instances.kafka.KafkaJsonMapper; import org.apache.samoa.learners.InstanceContentEvent; import org.junit.After; import org.junit.AfterClass; @@ -108,7 +110,7 @@ public static void setUpClass() throws IOException { } @AfterClass - public static void tearDownClass() { + public static void tearDownClass() { kafkaServer.shutdown(); zkClient.close(); zkServer.shutdown(); @@ -130,7 +132,7 @@ public void testSendingData() throws InterruptedException, ExecutionException, T final Logger logger = Logger.getLogger(KafkaDestinationProcessorTest.class.getName()); Properties props = TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new OosTestSerializer()); + KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new KafkaJsonMapper(Charset.forName("UTF-8"))); kdp.onCreate(1); final int[] i = {0}; diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index 55c3b85c..bec211d7 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -34,12 +34,9 @@ * limitations under the License. * #L% */ -import com.google.gson.Gson; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutionException; @@ -47,6 +44,8 @@ import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; + +import org.apache.samoa.instances.kafka.KafkaJsonMapper; import org.apache.samoa.learners.InstanceContentEvent; import org.junit.After; import org.junit.AfterClass; @@ -67,7 +66,7 @@ import org.I0Itec.zkclient.ZkClient; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.InstancesHeader; /** * @@ -109,7 +108,7 @@ public static void setUpClass() throws IOException { Time mock = new MockTime(); kafkaServer = TestUtils.createServer(config, mock); - // create topics + // create topics AdminUtils.createTopic(zkUtils, TOPIC_OOS, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); } @@ -143,7 +142,7 @@ public void testFetchingNewData() throws InterruptedException, ExecutionExceptio logger.log(Level.INFO, "testFetchingNewData"); Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_OOS, TIMEOUT, new OosTestSerializer()); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_OOS, TIMEOUT, new KafkaJsonMapper(Charset.forName("UTF-8"))); kep.onCreate(1); @@ -155,13 +154,14 @@ public void run() { Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); - OosTestSerializer serializer = new OosTestSerializer(); + + KafkaJsonMapper kafkaJsonMapper = new KafkaJsonMapper(Charset.forName("UTF-8")); int i = 0; for (i = 0; i < NUM_INSTANCES; i++) { try { InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); - ProducerRecord record = new ProducerRecord(TOPIC_OOS, serializer.serialize(event)); + ProducerRecord record = new ProducerRecord(TOPIC_OOS, kafkaJsonMapper.serialize(event.getInstance())); long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); } catch (InterruptedException | ExecutionException | TimeoutException ex) { Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaLoaderTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaLoaderTest.java new file mode 100644 index 00000000..fe758d51 --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaLoaderTest.java @@ -0,0 +1,174 @@ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.utils.Time; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstancesHeader; +import org.apache.samoa.instances.kafka.KafkaConsumerThread; +import org.apache.samoa.instances.kafka.KafkaDeserializer; +import org.apache.samoa.instances.kafka.KafkaJsonMapper; +import org.apache.samoa.instances.loaders.KafkaLoader; +import org.apache.samoa.learners.InstanceContentEvent; +import org.junit.*; +import org.mortbay.log.Log; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static org.junit.Assert.*; + +public class KafkaLoaderTest { + + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC_OOS = "samoa_test-oos"; + private static final int NUM_INSTANCES = 11111; + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + private static final int TIMEOUT = 1000; + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock); + + // create topics + AdminUtils.createTopic(zkUtils, TOPIC_OOS, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + + } + + @AfterClass + public static void tearDownClass() { + try { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } catch (Exception ex) { + Logger.getLogger(KafkaLoaderTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + + @Test + public void testFetchingNewData() throws InterruptedException, ExecutionException, TimeoutException { + + final Logger logger = Logger.getLogger(KafkaLoaderTest.class.getName()); + logger.log(Level.INFO, "OOS"); + logger.log(Level.INFO, "testFetchingNewData"); + Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); + props.setProperty("auto.offset.reset", "earliest"); + KafkaLoader kafkaLoader = new KafkaLoader(); + + KafkaDeserializer kafkaDeserializer = new KafkaJsonMapper(Charset.defaultCharset()); + kafkaLoader.setDeserializer(kafkaDeserializer); + + kafkaLoader.setKafkaConsumerThread(new KafkaConsumerThread(props, Arrays.asList(TOPIC_OOS), TIMEOUT)); + kafkaLoader.runKafkaConsumerThread(); + + // prepare new thread for data producing + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST, BROKERPORT)); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + + KafkaJsonMapper kafkaJsonMapper = new KafkaJsonMapper(Charset.forName("UTF-8")); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); + + ProducerRecord record = new ProducerRecord(TOPIC_OOS, kafkaJsonMapper.serialize(event.getInstance())); + long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + + + + int z = 0; + while (z < NUM_INSTANCES && kafkaLoader.hasNext()) { + Instance event = kafkaLoader.readInstance(); + z++; + } + kafkaLoader.close(); + + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + + } +} \ No newline at end of file diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java index 186d97bd..c1583b2c 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java @@ -60,7 +60,7 @@ import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.utils.Time; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.InstancesHeader; import org.junit.After; import org.junit.AfterClass; import static org.junit.Assert.*; @@ -229,9 +229,9 @@ public void testSendKafkaMessage() throws InterruptedException { Thread.sleep(2 * CONSUMER_TIMEOUT); logger.log(Level.INFO, "Get results from Kafka"); - + List consumed = new ArrayList<>(); - + while (consumed.size() != sent.size()) { ConsumerRecords records = consumer.poll(CONSUMER_TIMEOUT); Iterator> it = records.iterator(); diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java index 14535bbf..0eb7adf1 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java @@ -39,19 +39,23 @@ import java.io.ObjectOutputStream; import java.util.logging.Level; import java.util.logging.Logger; + +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.kafka.KafkaDeserializer; +import org.apache.samoa.instances.kafka.KafkaSerializer; import org.apache.samoa.learners.InstanceContentEvent; /** * * @author Piotr Wawrzyniak */ -public class OosTestSerializer implements KafkaDeserializer, KafkaSerializer { +public class OosTestSerializer implements KafkaDeserializer, KafkaSerializer { @Override - public InstanceContentEvent deserialize(byte[] message) { + public Instance deserialize(byte[] message) { try { ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(message)); - InstanceContentEvent ice = (InstanceContentEvent)ois.readObject(); + Instance ice = (Instance)ois.readObject(); return ice; } catch (IOException | ClassNotFoundException ex) { Logger.getLogger(OosTestSerializer.class.getName()).log(Level.SEVERE, null, ex); @@ -60,7 +64,7 @@ public InstanceContentEvent deserialize(byte[] message) { } @Override - public byte[] serialize(InstanceContentEvent message) { + public byte[] serialize(Instance message) { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java index 89367595..58ba2b80 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java @@ -35,10 +35,10 @@ import java.util.Properties; import java.util.Random; import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; -import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.Instances; +import org.apache.samoa.instances.instances.InstancesHeader; import org.apache.samoa.learners.InstanceContentEvent; import org.apache.samoa.moa.core.FastVector; diff --git a/samoa-instances/pom.xml b/samoa-instances/pom.xml index e3b7378e..b16d4de0 100644 --- a/samoa-instances/pom.xml +++ b/samoa-instances/pom.xml @@ -40,5 +40,41 @@ avro ${avro.version} + + + + com.google.code.gson + gson + ${gson.version} + + + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test + + + + org.apache.kafka + kafka_2.11 + ${kafka.version} + + + + org.apache.kafka + kafka_2.11 + ${kafka.version} + test + test + diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java index 128ace72..5a7a4f0a 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.Utils; + import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.*; @@ -73,6 +75,7 @@ public class Attribute implements Serializable { */ protected SimpleDateFormat m_DateFormat; + /** * The name. */ @@ -205,7 +208,7 @@ public int numValues() { * @param value the value * @return the string */ - String formatDate(double value) { + public String formatDate(double value) { return this.m_DateFormat.format(new Date((long) value)); } @@ -214,10 +217,20 @@ String formatDate(double value) { * * @return true, if is date */ - boolean isDate() { + public boolean isDate() { return isDate; } + /** + * Get name. + * + * @return name + */ + public String getName() { + return name; + } + + /** * The values string attribute. */ @@ -231,7 +244,7 @@ boolean isDate() { */ public final int indexOfValue(String value) { - if (isNominal() == false) { + if (!isNominal()) { return -1; } if (this.valuesStringAttribute == null) { @@ -246,7 +259,7 @@ public final int indexOfValue(String value) { if (val == null) { return -1; } else { - return val.intValue(); + return val; } } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java index 58ece8ee..455ec41d 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java @@ -31,6 +31,8 @@ public class AttributesInformation implements Serializable { */ protected List attributes; protected List indexValues; + + /** * The number of attributes. */ @@ -115,7 +117,7 @@ public int locateIndex(int index) { int max = this.indexValues.size() - 1; if (max == -1) { - return -1; + return - 1; } // Binary search @@ -149,4 +151,9 @@ public void setAttributes(List v, List indexValues) { this.indexValues=indexValues; } + public int getNumberAttributes() { + return numberAttributes; + } + + } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java index 0b51c56f..2172f379 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java @@ -32,7 +32,7 @@ public MultiLabelPrediction() { public MultiLabelPrediction(int numOutputAttributes) { prediction = new ArrayList< ArrayList >(); - for (int i=0; i()); } @@ -57,7 +57,7 @@ public double[] getVotes(int outputAttributeIndex) { if (prediction.size() > outputAttributeIndex) { ArrayList aux = prediction.get(outputAttributeIndex); ret = new double[s]; - for (int i =0;i < s;i++) { + for (int i = 0; i < s; i++) { ret[i] = aux.get(i).doubleValue(); } } @@ -82,10 +82,10 @@ public double getVote(int outputAttributeIndex, int classIndex) { @Override public void setVotes(int outputAttributeIndex, double[] votes) { - for(int i=0; i= prediction.get(outputAttributeIndex).size()) { prediction.get(outputAttributeIndex).ensureCapacity(i+1); - while (prediction.get(outputAttributeIndex).size() < i+1) { + while (prediction.get(outputAttributeIndex).size() < i + 1) { prediction.get(outputAttributeIndex).add(0.0); } } @@ -102,8 +102,8 @@ public void setVotes(double[] votes) { @Override public void setVote(int outputAttributeIndex, int classIndex, double vote) { if (outputAttributeIndex >= prediction.get(outputAttributeIndex).size()) { - prediction.get(outputAttributeIndex).ensureCapacity(classIndex+1); - while (prediction.get(outputAttributeIndex).size() < classIndex+1) { + prediction.get(outputAttributeIndex).ensureCapacity(classIndex + 1); + while (prediction.get(outputAttributeIndex).size() < classIndex + 1) { prediction.get(outputAttributeIndex).add(0.0); } } @@ -113,12 +113,12 @@ public void setVote(int outputAttributeIndex, int classIndex, double vote) { @Override public String toString(){ - StringBuffer sb= new StringBuffer(); - for (int i=0; i= (outputAttributeIndex + 1) && prediction.get(outputAttributeIndex).size() != 0; } @Override diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java index 059e912d..72c01e47 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java @@ -27,7 +27,7 @@ public interface Prediction { * * @return the number of output attributes */ - public int numOutputAttributes(); + int numOutputAttributes(); /** * Different output attributes may have different number of classes. @@ -35,7 +35,7 @@ public interface Prediction { * * @return the number of classes for attribute attributeIndex */ - public int numClasses(int outputAttributeIndex); + int numClasses(int outputAttributeIndex); /* * The predictions for each output attribute. @@ -50,32 +50,32 @@ public interface Prediction { * * @return the votes for a given output attribute outputAttributeIndex. */ - public double [] getVotes(int outputAttributeIndex); + double [] getVotes(int outputAttributeIndex); /** * The vote assigned to a class of an output attribute * * @return the vote for an output attribute outputAttributeIndex and a class classIndex. */ - public double getVote(int outputAttributeIndex, int classIndex); + double getVote(int outputAttributeIndex, int classIndex); /** * Sets the votes for a given output attribute * */ - public void setVotes(int outputAttributeIndex, double [] votes); + void setVotes(int outputAttributeIndex, double[] votes); /** * Sets the votes for the first output attribute * */ - public void setVotes(double[] votes); + void setVotes(double[] votes); /** * Sets the vote for class of a given output attribute * */ - public void setVote(int outputAttributeIndex, int classIndex, double vote); + void setVote(int outputAttributeIndex, int classIndex, double vote); /** * The votes for the first output attribute @@ -97,14 +97,14 @@ public interface Prediction { * * @return the votes for the first output attribute outputAttributeIndex. */ - public int size(); + int size(); /** * The text of the prediction, that is the description of the values of the prediction * * @return the text */ - public String toString(); + String toString(); } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java index 52d036f1..7e245907 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java @@ -64,7 +64,7 @@ public void setRange(String range) { * Translates a single string selection into it's internal 0-based * equivalent. * - * @param single the string representing the selection (eg: 1 first last) + * @param singleSelection the string representing the selection (eg: 1 first last) * @return the number corresponding to the selected value */ protected /*@pure@*/ int rangeSingle(/*@non_null@*/String singleSelection) { @@ -83,7 +83,7 @@ public void setRange(String range) { return index; } - boolean isInRange(int value) { + public boolean isInRange(int value) { boolean ret = false; if (value >= start && value <= end) { ret = true; @@ -91,7 +91,7 @@ boolean isInRange(int value) { return ret; } - int getSelectionLength() { + public int getSelectionLength() { return end - start + 1; } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java index 778f0034..7558e770 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java @@ -50,7 +50,7 @@ public static String quote(String string) { // Enclose the string in 's if the string contains a recently added // backquote or contains one of the following characters. - if ((quote == true) || (string.indexOf('{') != -1) || (string.indexOf('}') != -1) || (string.indexOf(',') != -1) + if ((quote) || (string.indexOf('{') != -1) || (string.indexOf('}') != -1) || (string.indexOf(',') != -1) || (string.equals("?")) || (string.indexOf(' ') != -1) || (string.equals(""))) { string = ("'".concat(string)).concat("'"); diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstance.java similarity index 96% rename from samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstance.java index d8a789b1..5f9339a1 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstance.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstanceData.java similarity index 97% rename from samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstanceData.java index 6781e914..9d55e64c 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/DenseInstanceData.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,7 +20,7 @@ * #L% */ -public class DenseInstanceData implements InstanceData{ +public class DenseInstanceData implements InstanceData { /** * Instantiates a new dense instance data. diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instance.java similarity index 73% rename from samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instance.java index da4dcdd0..9a8e815b 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instance.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.Attribute; + import java.io.Serializable; public interface Instance extends Serializable { @@ -29,14 +31,14 @@ public interface Instance extends Serializable { * * @return the weight */ - public double weight(); + double weight(); /** * Sets the weight. * * @param weight the new weight */ - public void setWeight(double weight); + void setWeight(double weight); /** * Attribute. @@ -44,28 +46,28 @@ public interface Instance extends Serializable { * @param instAttIndex the inst att index * @return the attribute */ - public Attribute attribute(int instAttIndex); + Attribute attribute(int instAttIndex); /** * Delete attribute at. * * @param i the index */ - public void deleteAttributeAt(int i); + void deleteAttributeAt(int i); /** * Insert attribute at. * * @param i the index */ - public void insertAttributeAt(int i); + void insertAttributeAt(int i); /** * Gets the number of attributes. * * @return the number of attributes */ - public int numAttributes(); + int numAttributes(); /** * Adds the sparse values. @@ -74,14 +76,14 @@ public interface Instance extends Serializable { * @param attributeValues the attribute values * @param numberAttributes the number attributes */ - public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); + void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); /** * Gets the number of values, mainly for sparse instances. * * @return the number of values */ - public int numValues(); + int numValues(); /** * Gets the value of a discrete attribute as a string. @@ -89,7 +91,7 @@ public interface Instance extends Serializable { * @param i the i * @return the string */ - public String stringValue(int i); + String stringValue(int i); /** * Gets the value of an attribute. @@ -97,14 +99,14 @@ public interface Instance extends Serializable { * @param instAttIndex the inst att index * @return the double */ - public double value(int instAttIndex); + double value(int instAttIndex); /** * Sets an attribute as missing * * @param instAttIndex, the attribute's index */ - public void setMissing(int instAttIndex); + void setMissing(int instAttIndex); /** * Sets the value of an attribute. @@ -112,7 +114,7 @@ public interface Instance extends Serializable { * @param instAttIndex the index * @param value the value */ - public void setValue(int instAttIndex, double value); + void setValue(int instAttIndex, double value); /** * Checks if an attribute is missing. @@ -120,7 +122,7 @@ public interface Instance extends Serializable { * @param instAttIndex the inst att index * @return true, if is missing */ - public boolean isMissing(int instAttIndex); + boolean isMissing(int instAttIndex); /** * Gets the index of the attribute given the index of the array in a sparse @@ -129,7 +131,7 @@ public interface Instance extends Serializable { * @param arrayIndex the index of the array * @return the index */ - public int index(int arrayIndex); + int index(int arrayIndex); /** * Gets the value of an attribute in a sparse representation of the @@ -138,7 +140,7 @@ public interface Instance extends Serializable { * @param i the i * @return the value */ - public double valueSparse(int i); + double valueSparse(int i); /** * Checks if the attribute is missing sparse. @@ -146,98 +148,98 @@ public interface Instance extends Serializable { * @param p1 the p1 * @return true, if is missing sparse */ - public boolean isMissingSparse(int p1); + boolean isMissingSparse(int p1); /** * To double array. * * @return the double[] */ - public double[] toDoubleArray(); + double[] toDoubleArray(); /** * Class attribute. * * @return the attribute */ - public Attribute classAttribute(); + Attribute classAttribute(); /** * Class index. * * @return the int */ - public int classIndex(); + int classIndex(); /** * Class is missing. * * @return true, if successful */ - public boolean classIsMissing(); + boolean classIsMissing(); /** * Class value. * * @return the double */ - public double classValue(); + double classValue(); /** * Num classes. * * @return the int */ - public int numClasses(); + int numClasses(); /** * Sets the class value. * * @param d the new class value */ - public void setClassValue(double d); + void setClassValue(double d); /** * Copy. * * @return the instance */ - public Instance copy(); + Instance copy(); /** * Sets the dataset. * * @param dataset the new dataset */ - public void setDataset(Instances dataset); + void setDataset(Instances dataset); /** * Dataset. * * @return the instances */ - public Instances dataset(); + Instances dataset(); /** * Gets the number of input attributes. * * @return the number of input attributes */ - public int numInputAttributes(); + int numInputAttributes(); /** * Gets the number of output attributes. * * @return the number of output attributes */ - public int numOutputAttributes(); + int numOutputAttributes(); /** * Gets the number of output attributes. * * @return the number of output attributes */ - public int numberOutputTargets(); + int numberOutputTargets(); /** * Gets the value of an output attribute. @@ -245,7 +247,7 @@ public interface Instance extends Serializable { * @param attributeIndex the index * @return the value */ - public double classValue(int attributeIndex); + double classValue(int attributeIndex); /** * Sets the value of an output attribute. @@ -253,7 +255,7 @@ public interface Instance extends Serializable { * @param indexClass the output attribute index * @param valueAttribute the value of the attribute */ - public void setClassValue(int indexClass, double valueAttribute); + void setClassValue(int indexClass, double valueAttribute); /** * Gets an output attribute given its index. @@ -261,7 +263,7 @@ public interface Instance extends Serializable { * @param attributeIndex the index * @return the attribute */ - public Attribute outputAttribute(int attributeIndex); + Attribute outputAttribute(int attributeIndex); /** * Gets an input attribute given its index. @@ -269,7 +271,7 @@ public interface Instance extends Serializable { * @param attributeIndex the index * @return the attribute */ - public Attribute inputAttribute(int attributeIndex); + Attribute inputAttribute(int attributeIndex); /** * Gets the value of an input attribute. @@ -277,7 +279,7 @@ public interface Instance extends Serializable { * @param attributeIndex the index * @return the value */ - public double valueInputAttribute(int attributeIndex); + double valueInputAttribute(int attributeIndex); /** * Gets the value of an output attribute. @@ -285,7 +287,7 @@ public interface Instance extends Serializable { * @param attributeIndex the index * @return the value */ - public double valueOutputAttribute(int attributeIndex); + double valueOutputAttribute(int attributeIndex); /** * Index of an Attribute. @@ -293,7 +295,7 @@ public interface Instance extends Serializable { * @param attribute, the attribute to be found. * @return the index of an attribute */ - public int indexOfAttribute(Attribute attribute); + int indexOfAttribute(Attribute attribute); /** * Gets the value of an attribute, given the attribute. @@ -301,14 +303,14 @@ public interface Instance extends Serializable { * @param attribute the attribute * @return the double */ - public double value(Attribute attribute); + double value(Attribute attribute); /** * Sets an attribute as missing * * @param attribute, the Attribute */ - public void setMissing(Attribute attribute); + void setMissing(Attribute attribute); /** * Sets the value of an attribute. @@ -316,7 +318,7 @@ public interface Instance extends Serializable { * @param attribute, the Attribute * @param value the value */ - public void setValue(Attribute attribute, double value); + void setValue(Attribute attribute, double value); /** * Checks if an attribute is missing. @@ -324,7 +326,7 @@ public interface Instance extends Serializable { * @param attribute, the Attribute * @return true, if is missing */ - public boolean isMissing(Attribute attribute); + boolean isMissing(Attribute attribute); diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceData.java similarity index 79% rename from samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceData.java index b735ea5c..128cf17f 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceData.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -29,7 +29,7 @@ public interface InstanceData extends Serializable { * * @return the int */ - public int numAttributes(); + int numAttributes(); /** * Value. @@ -37,7 +37,7 @@ public interface InstanceData extends Serializable { * @param instAttIndex the inst att index * @return the double */ - public double value(int instAttIndex); + double value(int instAttIndex); /** * Checks if is missing. @@ -45,14 +45,14 @@ public interface InstanceData extends Serializable { * @param instAttIndex the inst att index * @return true, if is missing */ - public boolean isMissing(int instAttIndex); + boolean isMissing(int instAttIndex); /** * Num values. * * @return the int */ - public int numValues(); + int numValues(); /** * Index. @@ -60,7 +60,7 @@ public interface InstanceData extends Serializable { * @param i the i * @return the int */ - public int index(int i); + int index(int i); /** * Value sparse. @@ -68,7 +68,7 @@ public interface InstanceData extends Serializable { * @param i the i * @return the double */ - public double valueSparse(int i); + double valueSparse(int i); /** * Checks if is missing sparse. @@ -76,14 +76,14 @@ public interface InstanceData extends Serializable { * @param p1 the p1 * @return true, if is missing sparse */ - public boolean isMissingSparse(int p1); + boolean isMissingSparse(int p1); /** * To double array. * * @return the double[] */ - public double[] toDoubleArray(); + double[] toDoubleArray(); /** * Sets the value. @@ -91,7 +91,7 @@ public interface InstanceData extends Serializable { * @param m_numAttributes the m_num attributes * @param d the d */ - public void setValue(int m_numAttributes, double d); + void setValue(int m_numAttributes, double d); /** @@ -99,13 +99,13 @@ public interface InstanceData extends Serializable { * * @param index the indes */ - public void deleteAttributeAt(int index); + void deleteAttributeAt(int index); /** * Produces a shallow copy of this instance data. * * @return the shallow copy */ - public InstanceData copy(); + InstanceData copy(); } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceImpl.java similarity index 97% rename from samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceImpl.java index ff77dc22..10af30c0 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceImpl.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.*; + import java.text.SimpleDateFormat; public class InstanceImpl implements MultiLabelInstance { @@ -280,10 +282,10 @@ public int classIndex() { //return classIndex != Integer.MAX_VALUE ? classIndex : 0; // return ? classIndex : 0; if(classIndex == Integer.MAX_VALUE) - if(this.instanceHeader.instanceInformation.range!=null) - classIndex=instanceHeader.instanceInformation.range.getStart(); + if(this.instanceHeader.instanceInformation.range != null) + classIndex = instanceHeader.instanceInformation.range.getStart(); else - classIndex=0; + classIndex = 0; return classIndex; } @@ -334,8 +336,7 @@ public void setClassValue(double d) { */ @Override public Instance copy() { - InstanceImpl inst = new InstanceImpl(this); - return inst; + return new InstanceImpl(this); } /** diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceInformation.java similarity index 87% rename from samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceInformation.java index cfd7f518..92175104 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstanceInformation.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,10 @@ * #L% */ +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.AttributesInformation; +import org.apache.samoa.instances.Range; + import java.io.Serializable; import java.util.List; @@ -65,7 +69,7 @@ public InstanceInformation(InstanceInformation chunk) { * Instantiates a new instance information. * * @param st the st - * @param v the v + * @param input the v */ public InstanceInformation(String st, List input) { this.relationName = st; @@ -120,7 +124,7 @@ public Attribute classAttribute() { * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#numAttributes() */ public int numAttributes() { - return this.attributesInformation.numberAttributes; + return this.attributesInformation.getNumberAttributes(); } /* (non-Javadoc) @@ -152,18 +156,18 @@ public void insertAttributeAt(Attribute attribute, int i) { } public void setAttributes(List v) { - if(this.attributesInformation==null) - this.attributesInformation= new AttributesInformation(); + if(this.attributesInformation == null) + this.attributesInformation = new AttributesInformation(); this.attributesInformation.setAttributes(v); } public int inputAttributeIndex(int index) { int ret = 0; if (classIndex == Integer.MAX_VALUE) {//Multi Label - if(index index ? index : index + 1; @@ -174,7 +178,7 @@ public int inputAttributeIndex(int index) { public int outputAttributeIndex(int attributeIndex) { int ret = 0; if (classIndex == Integer.MAX_VALUE) {//Multi Label - ret=attributeIndex+range.getStart(); //JD - Range should be a "block" + ret = attributeIndex + range.getStart(); //JD - Range should be a "block" } else { //Single Label ret = classIndex; } @@ -184,7 +188,7 @@ public int outputAttributeIndex(int attributeIndex) { public int numInputAttributes() { int ret = 0; if (classIndex == Integer.MAX_VALUE) {//Multi Label - ret=this.numAttributes()-range.getSelectionLength(); //JD + ret = this.numAttributes() - range.getSelectionLength(); //JD } else { //Single Label ret = this.numAttributes() - 1; } @@ -194,7 +198,7 @@ public int numInputAttributes() { public int numOutputAttributes() { int ret = 0; if (classIndex == Integer.MAX_VALUE) {//Multi Label - ret=range.getSelectionLength(); //JD + ret = range.getSelectionLength(); //JD } else { //Single Label ret = 1; } @@ -207,8 +211,8 @@ public void setRangeOutputIndices(Range range) { } public void setAttributes(List v, List indexValues) { - if(this.attributesInformation==null) - this.attributesInformation= new AttributesInformation(); + if(this.attributesInformation == null) + this.attributesInformation = new AttributesInformation(); this.attributesInformation.setAttributes(v,indexValues); } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instances.java similarity index 89% rename from samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instances.java index 04fde392..f3bb7827 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/Instances.java @@ -1,4 +1,9 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; + +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.Range; +import org.apache.samoa.instances.Utils; +import org.apache.samoa.instances.loaders.*; import java.io.InputStream; @@ -52,10 +57,6 @@ public class Instances implements Serializable { */ protected List instances; - protected static enum AVRO_ENCODING_FORMAT { - JSON, BINARY - } - protected int classAttribute; /** @@ -81,6 +82,17 @@ public Instances(Instances chunk) { public Instances() { } + /** + * Instantiates a new instances. + * + * @param loader proper loader object. + */ + public Instances(Loader loader) { + this.loader = loader; + this.instanceInformation = this.loader.getStructure(); + this.instances = new ArrayList<>(); + } +//@TODO CG: CLEAN UP /** * Instantiates a new instances. * @@ -88,11 +100,11 @@ public Instances() { * @param size the size * @param classAttribute the class attribute */ - public Instances(Reader reader, int size, int classAttribute) { - this.loader = new ArffLoader(reader, 0, classAttribute); - this.instanceInformation = loader.getStructure(); - this.instances = new ArrayList(); - } +// public Instances(Reader reader, int size, int classAttribute) { +// this.loader = new ArffLoader(reader, 0, classAttribute); +// this.instanceInformation = loader.getStructure(); +// this.instances = new ArrayList<>(); +// } /** * Instantiates a new instances. @@ -100,23 +112,23 @@ public Instances(Reader reader, int size, int classAttribute) { * @param reader the reader * @param range */ - public Instances(Reader reader, Range range) { - this.loader = new ArffLoader(reader, 0, classAttribute);//new MultiTargetArffLoader(reader, range); - this.instanceInformation = loader.getStructure(); - this.instances = new ArrayList(); - } - - public Instances(InputStream inputStream, int classAttribute, String encodingFormat) { - this.classAttribute = classAttribute; - - if (encodingFormat.equalsIgnoreCase(AVRO_ENCODING_FORMAT.BINARY.toString())) - loader = new AvroBinaryLoader(inputStream, classAttribute); - else - loader = new AvroJsonLoader(inputStream, classAttribute); - - this.instanceInformation = loader.getStructure(); - this.instances = new ArrayList<>(); - } +// public Instances(Reader reader, Range range) { +// this.loader = new ArffLoader(reader, 0, classAttribute);//new MultiTargetArffLoader(reader, range); +// this.instanceInformation = loader.getStructure(); +// this.instances = new ArrayList<>(); +// } + +// public Instances(InputStream inputStream, int classAttribute, String encodingFormat) { +// this.classAttribute = classAttribute; +// +// if (encodingFormat.equalsIgnoreCase(AVRO_ENCODING_FORMAT.BINARY.toString())) +// loader = new AvroBinaryLoader(inputStream, classAttribute); +// else +// loader = new AvroJsonLoader(inputStream, classAttribute); +// +// this.instanceInformation = loader.getStructure(); +// this.instances = new ArrayList<>(); +// } /** * Instantiates a new instances. @@ -429,27 +441,6 @@ public double meanOrMode(int j) { throw new UnsupportedOperationException("Not yet implemented"); //CobWeb } - /** - * Read instance. - * - * @param fileReader the file reader - * @return true, if successful - */ - public boolean readInstance(Reader fileReader) { - - //ArffReader arff = new ArffReader(reader, this, m_Lines, 1); - if (loader == null) { - loader = new ArffLoader(fileReader, 0, this.classAttribute); - } - Instance inst = loader.readInstance(); - if (inst != null) { - inst.setDataset(this); - add(inst); - return true; - } else { - return false; - } - } public boolean readInstance() { @@ -608,7 +599,7 @@ protected String stringWithoutHeader() { * @param att, the attribute. */ protected int indexOf(Attribute att) { - if (this.hsAttributesIndices == null || !this.hsAttributesIndices.containsKey(att.name)) { + if (this.hsAttributesIndices == null || !this.hsAttributesIndices.containsKey(att.getName())) { computeAttributesIndices(); } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstancesHeader.java similarity index 97% rename from samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstancesHeader.java index c32cf881..7001a94d 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/InstancesHeader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.Attribute; + public class InstancesHeader extends Instances { private static final long serialVersionUID = 1L; diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/MultiLabelInstance.java similarity index 87% rename from samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/MultiLabelInstance.java index 9567c904..daf8d542 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/MultiLabelInstance.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.instances.Instance; + public interface MultiLabelInstance extends Instance { } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstance.java similarity index 94% rename from samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstance.java index a62013aa..ce6d6a40 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstance.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -20,6 +20,8 @@ * #L% */ +import org.apache.samoa.instances.instances.InstanceImpl; + public class SparseInstance extends InstanceImpl { /** diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstanceData.java similarity index 98% rename from samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstanceData.java index 77b634b9..3921af99 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/instances/SparseInstanceData.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.instances; /* * #%L @@ -262,7 +262,7 @@ public int locateIndex(int index) { /** * Deletes an attribute at the given position (0 to numAttributes() - 1). * - * @param pos the attribute's position + * @param position the attribute's position */ @Override public void deleteAttributeAt(int position) { diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaAvroMapper.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaAvroMapper.java new file mode 100644 index 00000000..c64f982a --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaAvroMapper.java @@ -0,0 +1,153 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.instances.kafka; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.avro.specific.SpecificRecord; +import org.apache.samoa.instances.instances.Instance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * #%L + * SAMOA + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Sample class for serializing and deserializing {@link Instance} + * from/to Avro format + * + * @author Jakub Jankowski + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaAvroMapper implements KafkaDeserializer, KafkaSerializer { + + private static Logger logger = LoggerFactory.getLogger(KafkaAvroMapper.class); + + @Override + public byte[] serialize(Instance message) { + return avroSerialize(Instance.class, message); + } + + @Override + public Instance deserialize(byte[] message) { + return avroDeserialize(message, Instance.class); + } + + + /** + * Avro serialization based on specified schema + * @param cls + * @param v + * @return + */ + public static byte[] avroSerialize(final Class cls, final V v) { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + try { + Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); + DatumWriter writer; + + if (v instanceof SpecificRecord) { + writer = new SpecificDatumWriter<>(schema); + } else { + writer = new ReflectDatumWriter<>(schema); + } + + BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); + writer.write(v, binEncoder); + binEncoder.flush(); + + } catch (IOException e) { + e.printStackTrace(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return bout.toByteArray(); + + } + + /** + * Avro deserialization based on specified schema + * @param avroBytes + * @param clazz + * @return + */ + public static V avroDeserialize(byte[] avroBytes, Class clazz) { + V ret = null; + try { + Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); + ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); + DatumReader reader = new SamoaDatumReader<>(schema); + + Decoder decoder = DecoderFactory.get().directBinaryDecoder(in, null); + + ret = reader.read(null, decoder); + } catch (IOException e) { + e.printStackTrace(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return ret; + } + + /** + * Avro serialization using reflection + * @param cls + * @param v + * @return + */ + public static byte[] toBytesGeneric(final Class cls, final V v) { + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + final Schema schema = ReflectData.AllowNull.get().getSchema(cls); + final DatumWriter writer = new ReflectDatumWriter(schema); + final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); + try { + writer.write(v, binEncoder); + binEncoder.flush(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return bout.toByteArray(); + } + +} \ No newline at end of file diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaConsumerThread.java similarity index 89% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaConsumerThread.java index fbd3ec67..76a877d3 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaConsumerThread.java @@ -1,174 +1,171 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -/* - * #%L - * SAMOA - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; - -/** - * - * @author pwawrzyniak - */ -class KafkaConsumerThread extends Thread { - - // Consumer class for internal use to retrieve messages from Kafka - private transient KafkaConsumer consumer; - - private Logger log = Logger.getLogger(KafkaConsumerThread.class.getName()); - - private final Properties consumerProperties; - private final Collection topics; - private final long consumerTimeout; - private final List buffer; - // used to synchronize things - private final Object lock; - private boolean running; - - /** - * Class constructor - * - * @param consumerProperties Properties of Consumer - * @param topics Topics to fetch (subscribe) - * @param consumerTimeout Timeout for data polling - */ - KafkaConsumerThread(Properties consumerProperties, Collection topics, long consumerTimeout) { - this.running = false; - this.consumerProperties = consumerProperties; - this.topics = topics; - this.consumerTimeout = consumerTimeout; - this.buffer = new ArrayList<>(); - lock = new Object(); - } - - @Override - public void run() { - - initializeConsumer(); - - while (running) { - fetchDataFromKafka(); - } - - cleanUp(); - } - - /** - * Method for fetching data from Apache Kafka. It takes care of received - * data - */ - private void fetchDataFromKafka() { - if (consumer != null) { - if (!consumer.subscription().isEmpty()) { - try { - List kafkaMsg = getMessagesBytes(consumer.poll(consumerTimeout)); - fillBufferAndNotifyWaits(kafkaMsg); - } catch (Throwable t) { - Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, t); - } - } - } - } - - /** - * Copies received messages to class buffer and notifies Processor to grab - * the data. - * - * @param kafkaMsg Messages received from Kafka - */ - private void fillBufferAndNotifyWaits(List kafkaMsg) { - synchronized (lock) { - buffer.addAll(kafkaMsg); - if (buffer.size() > 0) { - lock.notifyAll(); - } - } - } - - private void cleanUp() { - // clean resources - if (consumer != null) { - consumer.unsubscribe(); - consumer.close(); - } - } - - private void initializeConsumer() { - // lazy instantiation - log.log(Level.INFO, "Instantiating Kafka consumer"); - if (consumer == null) { - consumer = new KafkaConsumer<>(consumerProperties); - running = true; - } - consumer.subscribe(topics); - } - - private List getMessagesBytes(ConsumerRecords poll) { - Iterator> iterator = poll.iterator(); - List ret = new ArrayList<>(); - while (iterator.hasNext()) { - ret.add(iterator.next().value()); - } - return ret; - } - - void close() { - running = false; - } - - List getKafkaMessages() { - synchronized (lock) { - if (buffer.isEmpty()) { - try { - // block the call until new messages are received - lock.wait(); - } catch (InterruptedException ex) { - Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, ex); - } - } - ArrayList ret = new ArrayList<>(); - // copy buffer to return list - ret.addAll(buffer); - // clear message buffer - buffer.clear(); - return ret; - } - } -} +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.instances.kafka; + +/* + * #%L + * SAMOA + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; + +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * + * @author pwawrzyniak + */ +public class KafkaConsumerThread extends Thread { + + // Consumer class for internal use to retrieve messages from Kafka + private transient KafkaConsumer consumer; + + private Logger log = Logger.getLogger(KafkaConsumerThread.class.getName()); + + private final Properties consumerProperties; + private final Collection topics; + private final long consumerTimeout; + private final List buffer; + // used to synchronize things + private final Object lock; + private boolean running; + + /** + * Class constructor + * + * @param consumerProperties Properties of Consumer + * @param topics Topics to fetch (subscribe) + * @param consumerTimeout Timeout for data polling + */ + public KafkaConsumerThread(Properties consumerProperties, Collection topics, long consumerTimeout) { + this.running = false; + this.consumerProperties = consumerProperties; + this.topics = topics; + this.consumerTimeout = consumerTimeout; + this.buffer = new ArrayList<>(); + lock = new Object(); + } + + @Override + public void run() { + + initializeConsumer(); + + while (running) { + fetchDataFromKafka(); + } + + cleanUp(); + } + + /** + * Method for fetching data from Apache Kafka. It takes care of received + * data + */ + private void fetchDataFromKafka() { + if (consumer != null) { + if (!consumer.subscription().isEmpty()) { + try { + List kafkaMsg = getMessagesBytes(consumer.poll(consumerTimeout)); + fillBufferAndNotifyWaits(kafkaMsg); + } catch (Throwable t) { + Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, t); + } + } + } + } + + /** + * Copies received messages to class buffer and notifies Processor to grab + * the data. + * + * @param kafkaMsg Messages received from Kafka + */ + private void fillBufferAndNotifyWaits(List kafkaMsg) { + synchronized (lock) { + buffer.addAll(kafkaMsg); + if (buffer.size() > 0) { + lock.notifyAll(); + } + } + } + + private void cleanUp() { + // clean resources + if (consumer != null) { + consumer.unsubscribe(); + consumer.close(); + } + } + + private void initializeConsumer() { + // lazy instantiation + log.log(Level.INFO, "Instantiating Kafka consumer"); + if (consumer == null) { + consumer = new KafkaConsumer<>(consumerProperties); + running = true; + } + consumer.subscribe(topics); + } + + private List getMessagesBytes(ConsumerRecords poll) { + Iterator> iterator = poll.iterator(); + List ret = new ArrayList<>(); + while (iterator.hasNext()) { + ret.add(iterator.next().value()); + } + return ret; + } + + public void close() { + running = false; + } + + public List getKafkaMessages() { + synchronized (lock) { + if (buffer.isEmpty()) { + try { + // block the call until new messages are received + lock.wait(); + } catch (InterruptedException ex) { + Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, ex); + } + } + ArrayList ret = new ArrayList<>(); + // copy buffer to return list + ret.addAll(buffer); + // clear message buffer + buffer.clear(); + return ret; + } + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaDeserializer.java similarity index 88% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaDeserializer.java index 459c491c..000c561e 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaDeserializer.java @@ -1,51 +1,51 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -/* - * #%L - * SAMOA - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - - -import org.apache.samoa.core.ContentEvent; - -/** - * - * @author pwawrzyniak - * @param the class that would be deserialized - */ -public interface KafkaDeserializer { - - // TODO: Consider key-value schema? - /** - * Method that provides deserialization algorithm - * @param message Message as received from Apache Kafka - * @return Deserialized form of message, to be passed to topology - */ - T deserialize(byte[] message); -} +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.instances.kafka; + +/* + * #%L + * SAMOA + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import org.apache.samoa.instances.instances.Instance; + +/** + * + * @author pwawrzyniak + * @param the class that would be deserialized + */ +public interface KafkaDeserializer { + + // TODO: Consider key-value schema? + /** + * Method that provides deserialization algorithm + * @param message Message as received from Apache Kafka + * @return Deserialized form of message, to be passed to topology + */ + T deserialize(byte[] message); +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaJsonMapper.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaJsonMapper.java new file mode 100644 index 00000000..db9a03bb --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaJsonMapper.java @@ -0,0 +1,118 @@ +/* + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.instances.kafka; + +/* + * #%L + * SAMOA + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import com.google.gson.*; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.DenseInstanceData; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstanceData; + +import java.lang.reflect.Type; +import java.nio.charset.Charset; + +/** + * Sample class for serializing and deserializing {@link Instance} + * from/to JSON format + * + * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer { + + private final transient Gson gson; + private final Charset charset; + + /** + * Class constructor + * + * @param charset Charset to be used for bytes parsing + */ + public KafkaJsonMapper(Charset charset) { + this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCustomDeserializer()).registerTypeAdapter(Instance.class,new InstanceDataCreator()) + .create(); + this.charset = charset; + } + + @Override + public Instance deserialize(byte[] message) { + return gson.fromJson(new String(message, this.charset), Instance.class); + } + + @Override + public byte[] serialize(Instance message) { + return gson.toJson(message).getBytes(this.charset); + } + + + + //Unused + public class InstanceDataCreator implements InstanceCreator { + @Override + public Instance createInstance(Type type) { + return new DenseInstance(0); + } + } + + + public class InstanceDataCustomDeserializer implements JsonDeserializer { + + @Override + public InstanceData deserialize(JsonElement je, Type type, JsonDeserializationContext jdc) throws JsonParseException { + double[] attributeValues = null; + double classValues = 0.0d; + JsonObject obj = (JsonObject) je; + try { + attributeValues = jdc.deserialize(obj.get("attributeValues"), double[].class); + } catch (Exception e) { + + } + try { + classValues = jdc.deserialize(obj.get("classValue"), double.class); + } catch (Exception e) { + + } + if (attributeValues != null) { + DenseInstanceData did = new DenseInstanceData(attributeValues); + return did; + } else { + DenseInstanceData slid = new DenseInstanceData(); + slid.setValue(0, classValues); + return slid; + } + } + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaSerializer.java similarity index 88% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaSerializer.java index 2bbc2591..b6ad5e2b 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/KafkaSerializer.java @@ -1,52 +1,52 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -/* - * #%L - * SAMOA - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - - -import org.apache.samoa.core.ContentEvent; - -/** - * - * @author pwawrzyniak - * @param the class that would be serialized - */ -public interface KafkaSerializer { - - // TODO: Consider Key-Value schema? - - /** - * Method that provides serialization algorithm - * @param message Message received from topology, to be serialized - * @return Serialized form of the message - */ - byte[] serialize(T message); -} +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.instances.kafka; + +/* + * #%L + * SAMOA + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import org.apache.samoa.instances.instances.Instance; + +/** + * + * @author pwawrzyniak + * @param the class that would be serialized + */ +public interface KafkaSerializer { + + // TODO: Consider Key-Value schema? + + /** + * Method that provides serialization algorithm + * @param message Message received from topology, to be serialized + * @return Serialized form of the message + */ + byte[] serialize(T message); +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/SamoaDatumReader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/SamoaDatumReader.java new file mode 100644 index 00000000..c5ed1efe --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/kafka/SamoaDatumReader.java @@ -0,0 +1,131 @@ +package org.apache.samoa.instances.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import java.io.IOException; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.generic.GenericData.Array; +import org.apache.avro.io.ResolvingDecoder; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.samoa.instances.instances.DenseInstanceData; +import org.apache.samoa.instances.instances.SparseInstanceData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * DatumReader used to read objects built with InstanceData classes + * @author Jakub Jankowski + * + * @param + */ +public class SamoaDatumReader extends ReflectDatumReader { + + private static Logger logger = LoggerFactory.getLogger(SamoaDatumReader.class); + + public SamoaDatumReader() { + super(); + } + + /** Construct for reading instances of a class. */ + public SamoaDatumReader(Class c) { + super(c); + } + + /** Construct where the writer's and reader's schemas are the same. */ + public SamoaDatumReader(Schema root) { + super(root); + } + + /** Construct given writer's and reader's schema. */ + public SamoaDatumReader(Schema writer, Schema reader) { + super(writer, reader); + } + + /** Construct given writer's and reader's schema and the data model. */ + public SamoaDatumReader(Schema writer, Schema reader, ReflectData data) { + super(writer, reader, data); + } + + /** Construct given a {@link ReflectData}. */ + public SamoaDatumReader(ReflectData data) { + super(data); + } + + @Override + /** + * Called to read a record instance. Overridden to read InstanceData. + */ + protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException { + Object r = getData().newRecord(old, expected); + Object state = null; + + for (Field f : in.readFieldOrder()) { + int pos = f.pos(); + String name = f.name(); + Object oldDatum = null; + if (r instanceof DenseInstanceData) { + r = readDenseInstanceData(r, f, oldDatum, in, state); + } else if (r instanceof SparseInstanceData) { + r = readSparseInstanceData(r, f, oldDatum, in, state); + } else + readField(r, f, oldDatum, in, state); + } + + return r; + } + + private Object readDenseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) + throws IOException { + if (f.name().equals("attributeValues")) { + Array atributes = (Array) read(oldDatum, f.schema(), in); + double[] atributesArr = new double[atributes.size()]; + for (int i = 0; i < atributes.size(); i++) { + atributesArr[i] = (double) atributes.get(i); + } + return new DenseInstanceData(atributesArr); + } + return null; + } + + private Object readSparseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) + throws IOException { + if(f.name().equals("attributeValues")) { + Array atributes = (Array) read(oldDatum, f.schema(), in); + double[] atributesArr = new double[atributes.size()]; + for (int i = 0; i < atributes.size(); i++) + atributesArr[i] = (double) atributes.get(i); + ((SparseInstanceData)record).setAttributeValues(atributesArr); + } + if(f.name().equals("indexValues")) { + Array indexValues = (Array) read(oldDatum, f.schema(), in); + int[] indexValuesArr = new int[indexValues.size()]; + for (int i = 0; i < indexValues.size(); i++) { + indexValuesArr[i] = (int) indexValues.get(i); + } + ((SparseInstanceData)record).setIndexValues(indexValuesArr); + } + return record; + } + +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/ArffLoader.java similarity index 89% rename from samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/ArffLoader.java index dd82edaa..85f5edeb 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/ArffLoader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -20,6 +20,9 @@ * #L% */ +import org.apache.samoa.instances.*; +import org.apache.samoa.instances.instances.*; + import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; @@ -61,6 +64,20 @@ public ArffLoader(Reader reader, int size, int classAttribute) { } } + /** + * Instantiates a new arff loader. + * + * @param classAttribute the class attribute + */ + public ArffLoader(int classAttribute) { + if (classAttribute < 0) { + this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1); + //System.out.print(this.instanceInformation.classIndex()); + } else if (classAttribute > 0) { + this.instanceInformation.setClassIndex(classAttribute - 1); + } + } + protected Range range; /** @@ -79,28 +96,31 @@ public ArffLoader(Reader reader) { * @param range */ public ArffLoader(Reader reader, Range range) { - this.range = range; - BufferedReader br = new BufferedReader(reader); - - //Init streamTokenizer - streamTokenizer = new StreamTokenizer(br); - streamTokenizer.resetSyntax(); - streamTokenizer.whitespaceChars(0, ' '); - streamTokenizer.wordChars(' ' + 1, '\u00FF'); - streamTokenizer.whitespaceChars(',', ','); - streamTokenizer.commentChar('%'); - streamTokenizer.quoteChar('"'); - streamTokenizer.quoteChar('\''); - streamTokenizer.ordinaryChar('{'); - streamTokenizer.ordinaryChar('}'); - streamTokenizer.eolIsSignificant(true); - - this.instanceInformation = this.getHeader(); - - if (range != null) { //is MultiLabel - this.instanceInformation.setRangeOutputIndices(range); - } + setupStreamTokenizer(reader, range); + } + public void setupStreamTokenizer(Reader reader, Range range){ + this.range = range; + BufferedReader br = new BufferedReader(reader); + + //Init streamTokenizer + streamTokenizer = new StreamTokenizer(br); + streamTokenizer.resetSyntax(); + streamTokenizer.whitespaceChars(0, ' '); + streamTokenizer.wordChars(' ' + 1, '\u00FF'); + streamTokenizer.whitespaceChars(',', ','); + streamTokenizer.commentChar('%'); + streamTokenizer.quoteChar('"'); + streamTokenizer.quoteChar('\''); + streamTokenizer.ordinaryChar('{'); + streamTokenizer.ordinaryChar('}'); + streamTokenizer.eolIsSignificant(true); + + this.instanceInformation = this.getHeader(); + + if (range != null) { //is MultiLabel + this.instanceInformation.setRangeOutputIndices(range); + } } /** @@ -134,28 +154,6 @@ public Instance readInstance() { } - /** - * Reads instance. It detects if it is dense or sparse. - * - * @return the instance - */ - public Instance readInstance(Reader reader) { - while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) { - try { - streamTokenizer.nextToken(); - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - } - if (streamTokenizer.ttype == '{') { - return readInstanceSparse(); - // return readDenseInstanceSparse(); - } else { - return readInstanceDense(); - } - - } - /** * Reads a dense instance from the file. * @@ -183,8 +181,8 @@ public Instance readInstanceDense() { double value; if ("?".equals(streamTokenizer.sval)) { value = Double.NaN; //Utils.missingValue(); - } else if (isNumeric == true) { - value = Double.valueOf(streamTokenizer.sval).doubleValue(); + } else if (isNumeric) { + value = Double.valueOf(streamTokenizer.sval); } else { value = this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval); } @@ -207,7 +205,7 @@ public Instance readInstanceDense() { protected void setValue(Instance instance, int numAttribute, double value, boolean isNumber) { double valueAttribute; - if (isNumber && this.auxAttributes.get(numAttribute).isNominal) { + if (isNumber && this.auxAttributes.get(numAttribute).isNominal()) { valueAttribute = value;//this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value)); //System.out.println(value +"/"+valueAttribute+" "); @@ -265,7 +263,7 @@ private Instance readInstanceSparse() { || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { //System.out.print(streamTokenizer.sval + "-"); if (this.auxAttributes.get(numAttribute).isNumeric()) { - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true); + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, Double.valueOf(streamTokenizer.sval), true); } else { this.setSparseValue(instance, indexValues, attributeValues, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false); } @@ -284,8 +282,8 @@ private Instance readInstanceSparse() { int[] arrayIndexValues = new int[attributeValues.size()]; double[] arrayAttributeValues = new double[attributeValues.size()]; for (int i = 0; i < arrayIndexValues.length; i++) { - arrayIndexValues[i] = indexValues.get(i).intValue(); - arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); + arrayIndexValues[i] = indexValues.get(i); + arrayAttributeValues[i] = attributeValues.get(i); } instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes()); return instance; @@ -294,7 +292,7 @@ private Instance readInstanceSparse() { private void setSparseValue(Instance instance, List indexValues, List attributeValues, int numAttribute, double value, boolean isNumber) { double valueAttribute; - if (isNumber && this.auxAttributes.get(numAttribute).isNominal) { + if (isNumber && this.auxAttributes.get(numAttribute).isNominal()) { valueAttribute = this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value)); } else { valueAttribute = value; @@ -342,7 +340,7 @@ private Instance readDenseInstanceSparse() { || streamTokenizer.ttype == 34)) { //System.out.print(streamTokenizer.sval + "/"+this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval)+" "); if (this.auxAttributes.get(numAttribute).isNumeric()) { - instance.setValue(numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue());//this.setValue(instance, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true); + instance.setValue(numAttribute, Double.valueOf(streamTokenizer.sval));//this.setValue(instance, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true); } else { instance.setValue(numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval));//this.setValue(instance, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false); //numAttribute++; @@ -382,7 +380,7 @@ private InstanceInformation getHeader() { while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { //For each line //if (streamTokenizer.ttype == '@') { - if (streamTokenizer.ttype == StreamTokenizer.TT_WORD && streamTokenizer.sval.startsWith("@") == true) { + if (streamTokenizer.ttype == StreamTokenizer.TT_WORD && streamTokenizer.sval.startsWith("@")) { //streamTokenizer.nextToken(); String token = streamTokenizer.sval.toUpperCase(); if (token.startsWith("@RELATION")) { @@ -470,21 +468,18 @@ private InstanceInformation getHeader() { } protected Instance newSparseInstance(double d, double[] res) { - Instance inst = new SparseInstance(d, res); //is it dense? //inst.setInstanceInformation(this.instanceInformation); - return inst; + return new SparseInstance(d, res); } protected Instance newSparseInstance(double d) { - Instance inst = new SparseInstance(d); //inst.setInstanceInformation(this.instanceInformation); - return inst; + return new SparseInstance(d); } protected Instance newDenseInstance(int numberAttributes) { - Instance inst = new DenseInstance(numberAttributes); //inst.setInstanceInformation(this.instanceInformation); - return inst; + return new DenseInstance(numberAttributes); } private void setClassValue(Instance instance, double valueAttribute) { diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroBinaryLoader.java similarity index 90% rename from samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroBinaryLoader.java index 84b7eeca..ca4c6c90 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroBinaryLoader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -26,6 +26,7 @@ import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; +import org.apache.samoa.instances.instances.Instance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,13 +38,12 @@ public class AvroBinaryLoader extends AvroLoader { /** Avro Binary reader for an input stream **/ protected DataFileStream dataFileStream = null; - public AvroBinaryLoader(InputStream inputStream, int classAttribute) { + public AvroBinaryLoader(int classAttribute) { super(classAttribute); - initializeSchema(inputStream); } /* (non-Javadoc) - * @see org.apache.samoa.instances.AvroLoader#initializeSchema(java.io.InputStream) + * @see org.apache.samoa.instances.loaders.AvroLoader#initializeSchema(java.io.InputStream) */ @Override public void initializeSchema(InputStream inputStream) @@ -69,7 +69,7 @@ public void initializeSchema(InputStream inputStream) } /* (non-Javadoc) - * @see org.apache.samoa.instances.AvroLoader#readInstance() + * @see org.apache.samoa.instances.loaders.AvroLoader#readInstance() */ @Override public Instance readInstance() { diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroJsonLoader.java similarity index 90% rename from samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroJsonLoader.java index 622347a9..dff3a062 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroJsonLoader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -31,6 +31,7 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; +import org.apache.samoa.instances.instances.Instance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,13 +43,13 @@ public class AvroJsonLoader extends AvroLoader { /** The Character reader for JSON read */ protected Reader reader = null; - public AvroJsonLoader(InputStream inputStream, int classAttribute) { + + public AvroJsonLoader( int classAttribute) { super(classAttribute); - initializeSchema(inputStream); } /* (non-Javadoc) - * @see org.apache.samoa.instances.AvroLoader#initializeSchema(java.io.InputStream) + * @see org.apache.samoa.instances.loaders.AvroLoader#initializeSchema(java.io.InputStream) */ @Override public void initializeSchema(InputStream inputStream) @@ -75,7 +76,7 @@ public void initializeSchema(InputStream inputStream) } /* (non-Javadoc) - * @see org.apache.samoa.instances.AvroLoader#readInstance() + * @see org.apache.samoa.instances.loaders.AvroLoader#readInstance() */ @Override public Instance readInstance() { @@ -86,7 +87,7 @@ public Instance readInstance() { try { while ((line = ((BufferedReader) reader).readLine()) != null) { - if (line == null || line.trim().length() <= 0) + if (line.trim().length() <= 0) continue; decoder = DecoderFactory.get().jsonDecoder(schema, line); diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroLoader.java similarity index 93% rename from samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroLoader.java index d3e7f274..1400eb12 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/AvroLoader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -26,10 +26,14 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; -import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericData.EnumSymbol; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstanceInformation; +import org.apache.samoa.instances.instances.SparseInstance; +import org.apache.samoa.instances.Attribute; public abstract class AvroLoader implements Loader { @@ -74,7 +78,7 @@ protected Instance readInstanceDense(GenericRecord record) int numAttribute = 0; for (Attribute attribute : attributes) { - Object value = record.get(attribute.name); + Object value = record.get(attribute.getName()); boolean isNumeric = attributes.get(numAttribute).isNumeric(); boolean isNominal = attributes.get(numAttribute).isNominal(); @@ -88,14 +92,14 @@ else if (value instanceof Long) else if (value instanceof Integer) this.setDenseValue(instance, numAttribute, (int) value); else - throw new RuntimeException("Invalid data type in the Avro data for Numeric Type : " + attribute.name); + throw new RuntimeException("Invalid data type in the Avro data for Numeric Type : " + attribute.getName()); } else if (isNominal) { double valueAttribute; if (!(value instanceof EnumSymbol)) - throw new RuntimeException("Invalid data type in the Avro data for Nominal Type : " + attribute.name); + throw new RuntimeException("Invalid data type in the Avro data for Nominal Type : " + attribute.getName()); EnumSymbol enumSymbolalue = (EnumSymbol) value; @@ -145,7 +149,7 @@ protected Instance readInstanceSparse(GenericRecord record) { for (Attribute attribute : attributes) { numAttribute++; - Object value = record.get(attribute.name); + Object value = record.get(attribute.getName()); boolean isNumeric = attributes.get(numAttribute).isNumeric(); boolean isNominal = attributes.get(numAttribute).isNominal(); @@ -167,14 +171,14 @@ else if (value instanceof Long) else if (value instanceof Integer) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (int) value); else - throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); + throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.getName()); } else if (isNominal) { double valueAttribute; if (!(value instanceof EnumSymbol)) - throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); + throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.getName()); EnumSymbol enumSymbolalue = (EnumSymbol) value; @@ -194,8 +198,8 @@ else if (isNominal) double[] arrayAttributeValues = new double[attributeValues.size()]; for (int i = 0; i < arrayIndexValues.length; i++) { - arrayIndexValues[i] = indexValues.get(i).intValue(); - arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); + arrayIndexValues[i] = indexValues.get(i); + arrayAttributeValues[i] = attributeValues.get(i); } instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes()); diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/KafkaLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/KafkaLoader.java new file mode 100644 index 00000000..57395d10 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/KafkaLoader.java @@ -0,0 +1,81 @@ +package org.apache.samoa.instances.loaders; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + + +import org.apache.samoa.instances.kafka.KafkaConsumerThread; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstanceInformation; +import org.apache.samoa.instances.kafka.KafkaDeserializer; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class KafkaLoader implements Loader { + + private KafkaConsumerThread kafkaConsumerThread; + private List buffer = new ArrayList<>(100); + private KafkaDeserializer deserializer; + + public void setDeserializer(KafkaDeserializer deserializer) { + this.deserializer = deserializer; + } + + public void setKafkaConsumerThread(KafkaConsumerThread kafkaConsumerThread) { + this.kafkaConsumerThread = kafkaConsumerThread; + } + + public void runKafkaConsumerThread(){ + kafkaConsumerThread.start(); + } + + @Override + public InstanceInformation getStructure() { + return null; + } + + @Override + public Instance readInstance() { + if(hasNext()) + return deserializer.deserialize(buffer.remove(0)); + return null; + } + + + public void close(){ + kafkaConsumerThread.close(); + } + + public boolean hasNext() { + if (buffer.isEmpty()) { + try { + buffer.addAll(kafkaConsumerThread.getKafkaMessages()); + } catch (Exception ex) { + Logger.getLogger(KafkaLoader.class.getName()).log(Level.SEVERE, null, ex); + } + } + return buffer.size() > 0; + } + +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/Loader.java similarity index 79% rename from samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/Loader.java index 65835f13..a246a14b 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/Loader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -20,6 +20,9 @@ * #L% */ +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstanceInformation; + import java.io.Serializable; public interface Loader extends Serializable { @@ -29,13 +32,13 @@ public interface Loader extends Serializable { * * @return InstanceInformation */ - public InstanceInformation getStructure(); + InstanceInformation getStructure(); /** * Read a single instance from the Stream * * @return Instance */ - public Instance readInstance(); + Instance readInstance(); } diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderFactory.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderFactory.java new file mode 100644 index 00000000..2dd0d256 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderFactory.java @@ -0,0 +1,43 @@ +package org.apache.samoa.instances.loaders; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + + + +public class LoaderFactory { + + public Loader createLoader(LoaderType loaderType, int classAttribute){ + switch (loaderType){ + case AVRO_JSON_LOADER: + return new AvroJsonLoader(classAttribute); + case AVRO_BINARY_LOADER: + return new AvroBinaryLoader(classAttribute); + case ARFF_LOADER: + return new ArffLoader(classAttribute); + case KAFKA_LOADER: + return new KafkaLoader(); + case UNKNOWN: + break; + } + return null; + } +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderType.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderType.java new file mode 100644 index 00000000..e099b5b1 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/LoaderType.java @@ -0,0 +1,45 @@ +package org.apache.samoa.instances.loaders; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +public enum LoaderType { + AVRO_BINARY_LOADER("AVRO_BINARY_LOADER"), + AVRO_JSON_LOADER("AVRO_JSON_LOADER"), + JSON_LOADER("JSON_LOADER"), + ARFF_LOADER("ARFF_LOADER"), + KAFKA_LOADER("KAFKA_LOADER"), + UNKNOWN("UNKNOWN"); + + private String loaderName; + + LoaderType(String loaderName){ + + } + + public static LoaderType fromString(String str) { + for (LoaderType v : + LoaderType.values()) { + if (v.toString().equalsIgnoreCase(str)) return v; + } + return UNKNOWN; + } +} diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/MultiTargetArffLoader.java similarity index 82% rename from samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java rename to samoa-instances/src/main/java/org/apache/samoa/instances/loaders/MultiTargetArffLoader.java index 0e9a8fac..59f60d57 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/loaders/MultiTargetArffLoader.java @@ -1,4 +1,4 @@ -package org.apache.samoa.instances; +package org.apache.samoa.instances.loaders; /* * #%L @@ -20,6 +20,11 @@ * #L% */ +import org.apache.samoa.instances.instances.DenseInstance; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.Range; +import org.apache.samoa.instances.instances.SparseInstance; + import java.io.Reader; public class MultiTargetArffLoader extends ArffLoader { diff --git a/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java b/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java index d34baa93..fd4f69da 100644 --- a/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java +++ b/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java @@ -20,9 +20,9 @@ * #L% */ -import org.apache.samoa.instances.ArffLoader; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.InstanceInformation; +import org.apache.samoa.instances.instances.Instance; +import org.apache.samoa.instances.instances.InstanceInformation; +import org.apache.samoa.instances.loaders.ArffLoader; import org.junit.Before; import org.junit.Test; @@ -96,7 +96,7 @@ public void testGetHeader() { @Test public void testReadInstance() { - Instance instance = loader.readInstance(reader); + Instance instance = loader.readInstance(); assertEquals(1065.731934, instance.value(0), 0); assertEquals(0, instance.value(1), 0); assertEquals(0, instance.value(2), 0);