diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/EmailClean.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/EmailClean.scala index b73f7e4..db46831 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/EmailClean.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/EmailClean.scala @@ -10,7 +10,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.spark.sql.SparkSession class EmailClean extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Clean email format data." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/IdentityNumberClean.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/IdentityNumberClean.scala index b2db537..b40cc36 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/IdentityNumberClean.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/IdentityNumberClean.scala @@ -14,7 +14,7 @@ import org.apache.spark.sql.SparkSession import scala.reflect.macros.ParseException class IdentityNumberClean extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Clean Id Card data." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/PhoneNumberClean.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/PhoneNumberClean.scala index a81b73d..0b5ac32 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/PhoneNumberClean.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/PhoneNumberClean.scala @@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.spark.sql.SparkSession class PhoneNumberClean extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Clean phone number format data." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/TitleClean.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/TitleClean.scala index c045e7e..52acd9e 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/TitleClean.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/clean/TitleClean.scala @@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.spark.sql.SparkSession class TitleClean extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Clean title format data." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/FetchFile.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/FetchFile.scala index 11e1ec0..19a6ba9 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/FetchFile.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/FetchFile.scala @@ -10,7 +10,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} class FetchFile extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Fetch file from hdfs to local." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/PutFile.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/PutFile.scala index 0990938..be2b256 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/PutFile.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/PutFile.scala @@ -13,10 +13,10 @@ import org.apache.hadoop.conf.Configuration import org.apache.spark.sql.SparkSession class PutFile extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Put local file to hdfs." val inportList: List[String] = List(PortEnum.DefaultPort.toString) - val outportList: List[String] = List(PortEnum.NonePort.toString) + val outportList: List[String] = List(PortEnum.DefaultPort.toString) var hdfs_path:String =_ var local_path:String=_ var fs:FileSystem=null diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/RegexTextProcess.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/RegexTextProcess.scala index 76d2aa2..b2b2056 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/file/RegexTextProcess.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/file/RegexTextProcess.scala @@ -9,7 +9,7 @@ import org.apache.spark.sql.SparkSession class RegexTextProcess extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Use regex to replace text." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/LoadZipFromUrl.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/LoadZipFromUrl.scala index f7fbe46..711c1e6 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/LoadZipFromUrl.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/LoadZipFromUrl.scala @@ -10,7 +10,7 @@ import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import org.apache.spark.sql.SparkSession class LoadZipFromUrl extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "DownLoad zip file by http." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnGZip.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnGZip.scala index 0a99e9b..744c1b8 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnGZip.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnGZip.scala @@ -12,7 +12,7 @@ import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInp import org.apache.spark.sql.{DataFrame, SparkSession} class UnGZip extends ConfigurableStop { - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Unzip tar.gz, tar, gz file." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/ReadFromKafka.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/ReadFromKafka.scala index 508e873..80ce7b9 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/ReadFromKafka.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/ReadFromKafka.scala @@ -99,5 +99,5 @@ class ReadFromKafka extends ConfigurableStop{ List(StopGroupEnum.KafkaGroup.toString) } - override val authorEmail: String = "xiaoxiao@cnic.cn" + override val authorEmail: String = "06whuxx@163.com" } diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/WriteToKafka.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/WriteToKafka.scala index 3fd94fd..3ae4275 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/WriteToKafka.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/kafka/WriteToKafka.scala @@ -82,5 +82,5 @@ class WriteToKafka extends ConfigurableStop{ List(StopGroupEnum.KafkaGroup.toString) } - override val authorEmail: String = "xiaoxiao@cnic.cn" + override val authorEmail: String = "06whuxx@163.com" } diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreePrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreePrediction.scala index e433bd3..4f79793 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreePrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreePrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassificationModel import org.apache.spark.sql.SparkSession class DecisionTreePrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist DecisionTreeModel to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreeTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreeTraining.scala index f66f569..49c69f7 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreeTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/DecisionTreeTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassifier import org.apache.spark.sql.SparkSession class DecisionTreeTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Training a DecisionTreeModel." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) @@ -95,10 +95,10 @@ class DecisionTreeTraining extends ConfigurableStop{ var descriptor : List[PropertyDescriptor] = List() val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true) val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true) - val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(true) - val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(true) - val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(true) - val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(true) + val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(true) + val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(true) + val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(true) + val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(true) val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(true) descriptor = training_data_path :: descriptor descriptor = model_save_path :: descriptor diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTPrediction.scala index a15f56d..563b031 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.sql.SparkSession class GBTPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist GBT Model to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTTraining.scala index 3825708..733d490 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/GBTTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassifier import org.apache.spark.sql.SparkSession class GBTTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Training a GBT Model." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) @@ -118,14 +118,14 @@ class GBTTraining extends ConfigurableStop{ var descriptor : List[PropertyDescriptor] = List() val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true) val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true) - val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false) - val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false) - val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false) - val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false) + val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false) + val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false) + val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false) + val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false) val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false) - val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false) - val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("ddd").defaultValue("").required(false) - val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("ddd").defaultValue("").required(false) + val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false) + val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("Loss function which GBT tries to minimize. (case-insensitive) Supported: \"logistic\" (default = logistic)").defaultValue("").required(false) + val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator. (default = 0.1)").defaultValue("").required(false) descriptor = training_data_path :: descriptor descriptor = model_save_path :: descriptor descriptor = maxBins :: descriptor diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionPrediction.scala index 1a0c8b7..1c8eb27 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.LogisticRegressionModel import org.apache.spark.sql.SparkSession class LogisticRegressionPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist LogisticRegressionModel to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionTraining.scala index f459b86..e3584c2 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/LogisticRegressionTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.ml.classification.LogisticRegression class LogisticRegressionTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Training a LogisticRegressionModel." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) @@ -104,12 +104,12 @@ class LogisticRegressionTraining extends ConfigurableStop{ var descriptor : List[PropertyDescriptor] = List() val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true) val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true) - val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("ddd").defaultValue("").required(true) - val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("ddd").defaultValue("").required(true) - val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("ddd").defaultValue("").required(true) - val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("ddd").defaultValue("").required(true) - val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("ddd").defaultValue("").required(true) - val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("ddd").defaultValue("").required(true) + val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations").defaultValue("").required(false) + val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("Param for the convergence tolerance for iterative algorithms (>= 0)").defaultValue("").required(false) + val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("Param for regularization parameter (>= 0)").defaultValue("").required(false) + val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("Param for the ElasticNet mixing parameter, in range [0, 1]").defaultValue("").required(false) + val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("Param for threshold in binary classification prediction, in range [0, 1]").defaultValue("").required(false) + val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("Param for the name of family which is a description of the label distribution to be used in the model").defaultValue("").required(false) descriptor = training_data_path :: descriptor descriptor = model_save_path :: descriptor descriptor = maxIter :: descriptor diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/MultilayerPerceptronPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/MultilayerPerceptronPrediction.scala index b2cece2..be08832 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/MultilayerPerceptronPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/MultilayerPerceptronPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.MultilayerPerceptronClassificationMode import org.apache.spark.sql.SparkSession class MultilayerPerceptronPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist MultilayerPerceptronModel to predict." val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesPrediction.scala index eebebd6..09b8195 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayesModel import org.apache.spark.sql.SparkSession class NaiveBayesPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist NaiveBayesModel to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesTraining.scala index 09b07a6..8b77940 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/NaiveBayesTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayes import org.apache.spark.sql.SparkSession class NaiveBayesTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Training a NaiveBayesModel." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestPrediction.scala index a67b66b..dc5898c 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassificationModel import org.apache.spark.sql.SparkSession class RandomForestPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist RandomForest Model to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestTraining.scala index 36ab2ba..948b6d7 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_classification/RandomForestTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassifier import org.apache.spark.sql.SparkSession class RandomForestTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Training a RandomForestModel." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) @@ -118,14 +118,14 @@ class RandomForestTraining extends ConfigurableStop{ var descriptor : List[PropertyDescriptor] = List() val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true) val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true) - val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false) - val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false) - val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false) - val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false) + val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false) + val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false) + val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false) + val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false) val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false) - val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false) - val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("ddd").defaultValue("").required(false) - val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("ddd").defaultValue("").required(false) + val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false) + val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("The number of features to consider for splits at each tree node.").defaultValue("").required(false) + val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("Number of trees to train (>= 1).").defaultValue("").required(false) descriptor = training_data_path :: descriptor descriptor = model_save_path :: descriptor descriptor = maxBins :: descriptor diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansPrediction.scala index 635fad0..b5e4d54 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeansModel import org.apache.spark.sql.SparkSession class BisectingKMeansPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist BisectingKMeans Model to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansTraining.scala index 0c35aa6..a0f41bf 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/BisectingKMeansTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeans import org.apache.spark.sql.SparkSession class BisectingKMeansTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "BisectingKMeans clustering." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixturePrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixturePrediction.scala index 987b14f..400a0c7 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixturePrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixturePrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixtureModel import org.apache.spark.sql.SparkSession class GaussianMixturePrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist GaussianMixture Model to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixtureTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixtureTraining.scala index 9236476..0e20cff 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixtureTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/GaussianMixtureTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixture import org.apache.spark.sql.SparkSession class GaussianMixtureTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "GaussianMixture clustering." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansPrediction.scala index 8265e52..3121ec3 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeansModel import org.apache.spark.sql.SparkSession class KmeansPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Make use of a exist KmeansModel to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansTraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansTraining.scala index 3da4ffc..a675eb2 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansTraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/KmeansTraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeans import org.apache.spark.sql.SparkSession class KmeansTraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "Kmeans clustering." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDAPrediction.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDAPrediction.scala index af30588..1f23226 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDAPrediction.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDAPrediction.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.{DistributedLDAModel, LDAModel, LocalLDAMo import org.apache.spark.sql.SparkSession class LDAPrediction extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com val description: String = "Make use of a exist LDAModel to predict." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDATraining.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDATraining.scala index bf07c62..ce3168d 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDATraining.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_clustering/LDATraining.scala @@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.LDA import org.apache.spark.sql.SparkSession class LDATraining extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "LDA clustering." val inportList: List[String] = List(PortEnum.NonePort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_feature/WordToVec.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_feature/WordToVec.scala index 883cb24..ca4d921 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_feature/WordToVec.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ml_feature/WordToVec.scala @@ -9,7 +9,7 @@ import org.apache.spark.ml.feature.Word2VecModel import org.apache.spark.sql.SparkSession class WordToVec extends ConfigurableStop{ - val authorEmail: String = "xiaoxiao@cnic.cn" + val authorEmail: String = "06whuxx@163.com" val description: String = "transfer word to vector" val inportList: List[String] = List(PortEnum.DefaultPort.toString) val outportList: List[String] = List(PortEnum.DefaultPort.toString) @@ -104,12 +104,12 @@ class WordToVec extends ConfigurableStop{ override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() - val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").defaultValue("").required(false) - val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("").defaultValue("").required(false) - val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("").defaultValue("").required(false) - val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("").defaultValue("").required(false) + val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").description("The dimension of the code that you want to transform from words. Default: 100").defaultValue("").required(false) + val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("Sets the maximum length (in words) of each sentence in the input data. Any sentence longer than this threshold will be divided into chunks of up to maxSentenceLength size. Default: 1000").defaultValue("").required(false) + val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations (>= 0)").defaultValue("").required(false) + val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("The minimum number of times a token must appear to be included in the word2vec model's vocabulary. Default: 5").defaultValue("").required(false) val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").defaultValue("").required(false) - val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("").defaultValue("").required(false) + val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("Param for Step size to be used for each iteration of optimization (> 0).").defaultValue("").required(false) val colName=new PropertyDescriptor().name("colName").displayName("INPUT_COL").description("").defaultValue("").required(true) val outputCol=new PropertyDescriptor().name("outputCol").displayName("OUTPUT_COL").description("").defaultValue("").required(true) descriptor = vectorSize :: descriptor diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/ReadFromRedis.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/ReadFromRedis.scala index 749a64d..03196f9 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/ReadFromRedis.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/ReadFromRedis.scala @@ -97,5 +97,5 @@ class ReadFromRedis extends ConfigurableStop{ List(StopGroupEnum.RedisGroup.toString) } - override val authorEmail: String = "xiaoxiao@cnic.cn" + override val authorEmail: String = "06whuxx@163.com" } diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/WriteToRedis.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/WriteToRedis.scala index 8833b9a..e11f7fe 100644 --- a/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/WriteToRedis.scala +++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/redis/WriteToRedis.scala @@ -69,5 +69,5 @@ class WriteToRedis extends ConfigurableStop{ List(StopGroupEnum.RedisGroup.toString) } - override val authorEmail: String = "xiaoxiao@cnic.cn" + override val authorEmail: String = "06whuxx@163.com" }