forked from opensci/piflow
alter email and add description for properties
This commit is contained in:
parent
e97fac9d84
commit
d02f3da4ee
|
@ -10,7 +10,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class EmailClean extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Clean email format data."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -14,7 +14,7 @@ import org.apache.spark.sql.SparkSession
|
|||
import scala.reflect.macros.ParseException
|
||||
|
||||
class IdentityNumberClean extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Clean Id Card data."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class PhoneNumberClean extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Clean phone number format data."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class TitleClean extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Clean title format data."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -10,7 +10,7 @@ import org.apache.hadoop.conf.Configuration
|
|||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
|
||||
class FetchFile extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Fetch file from hdfs to local."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -13,10 +13,10 @@ import org.apache.hadoop.conf.Configuration
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class PutFile extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Put local file to hdfs."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
var hdfs_path:String =_
|
||||
var local_path:String=_
|
||||
var fs:FileSystem=null
|
||||
|
|
|
@ -9,7 +9,7 @@ import org.apache.spark.sql.SparkSession
|
|||
|
||||
|
||||
class RegexTextProcess extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Use regex to replace text."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -10,7 +10,7 @@ import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class LoadZipFromUrl extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "DownLoad zip file by http."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -12,7 +12,7 @@ import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInp
|
|||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
|
||||
class UnGZip extends ConfigurableStop {
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Unzip tar.gz, tar, gz file."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -99,5 +99,5 @@ class ReadFromKafka extends ConfigurableStop{
|
|||
List(StopGroupEnum.KafkaGroup.toString)
|
||||
}
|
||||
|
||||
override val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
override val authorEmail: String = "06whuxx@163.com"
|
||||
}
|
||||
|
|
|
@ -82,5 +82,5 @@ class WriteToKafka extends ConfigurableStop{
|
|||
List(StopGroupEnum.KafkaGroup.toString)
|
||||
}
|
||||
|
||||
override val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
override val authorEmail: String = "06whuxx@163.com"
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassificationModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class DecisionTreePrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist DecisionTreeModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassifier
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class DecisionTreeTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Training a DecisionTreeModel."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
@ -95,10 +95,10 @@ class DecisionTreeTraining extends ConfigurableStop{
|
|||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
|
||||
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(true)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(true)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(true)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(true)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(true)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(true)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(true)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(true)
|
||||
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(true)
|
||||
descriptor = training_data_path :: descriptor
|
||||
descriptor = model_save_path :: descriptor
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassificationModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class GBTPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist GBT Model to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassifier
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class GBTTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Training a GBT Model."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
@ -118,14 +118,14 @@ class GBTTraining extends ConfigurableStop{
|
|||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
|
||||
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false)
|
||||
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false)
|
||||
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false)
|
||||
val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("ddd").defaultValue("").required(false)
|
||||
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("ddd").defaultValue("").required(false)
|
||||
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false)
|
||||
val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("Loss function which GBT tries to minimize. (case-insensitive) Supported: \"logistic\" (default = logistic)").defaultValue("").required(false)
|
||||
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator. (default = 0.1)").defaultValue("").required(false)
|
||||
descriptor = training_data_path :: descriptor
|
||||
descriptor = model_save_path :: descriptor
|
||||
descriptor = maxBins :: descriptor
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.LogisticRegressionModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class LogisticRegressionPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist LogisticRegressionModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.sql.SparkSession
|
|||
import org.apache.spark.ml.classification.LogisticRegression
|
||||
|
||||
class LogisticRegressionTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Training a LogisticRegressionModel."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
@ -104,12 +104,12 @@ class LogisticRegressionTraining extends ConfigurableStop{
|
|||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
|
||||
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true)
|
||||
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("ddd").defaultValue("").required(true)
|
||||
val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("ddd").defaultValue("").required(true)
|
||||
val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("ddd").defaultValue("").required(true)
|
||||
val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("ddd").defaultValue("").required(true)
|
||||
val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("ddd").defaultValue("").required(true)
|
||||
val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("ddd").defaultValue("").required(true)
|
||||
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations").defaultValue("").required(false)
|
||||
val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("Param for the convergence tolerance for iterative algorithms (>= 0)").defaultValue("").required(false)
|
||||
val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("Param for regularization parameter (>= 0)").defaultValue("").required(false)
|
||||
val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("Param for the ElasticNet mixing parameter, in range [0, 1]").defaultValue("").required(false)
|
||||
val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("Param for threshold in binary classification prediction, in range [0, 1]").defaultValue("").required(false)
|
||||
val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("Param for the name of family which is a description of the label distribution to be used in the model").defaultValue("").required(false)
|
||||
descriptor = training_data_path :: descriptor
|
||||
descriptor = model_save_path :: descriptor
|
||||
descriptor = maxIter :: descriptor
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.MultilayerPerceptronClassificationMode
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class MultilayerPerceptronPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist MultilayerPerceptronModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayesModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class NaiveBayesPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist NaiveBayesModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayes
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class NaiveBayesTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Training a NaiveBayesModel."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassificationModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class RandomForestPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist RandomForest Model to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassifier
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class RandomForestTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Training a RandomForestModel."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
@ -118,14 +118,14 @@ class RandomForestTraining extends ConfigurableStop{
|
|||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
|
||||
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false)
|
||||
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false)
|
||||
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false)
|
||||
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false)
|
||||
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false)
|
||||
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false)
|
||||
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false)
|
||||
val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("ddd").defaultValue("").required(false)
|
||||
val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("ddd").defaultValue("").required(false)
|
||||
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false)
|
||||
val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("The number of features to consider for splits at each tree node.").defaultValue("").required(false)
|
||||
val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("Number of trees to train (>= 1).").defaultValue("").required(false)
|
||||
descriptor = training_data_path :: descriptor
|
||||
descriptor = model_save_path :: descriptor
|
||||
descriptor = maxBins :: descriptor
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeansModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class BisectingKMeansPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist BisectingKMeans Model to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeans
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class BisectingKMeansTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "BisectingKMeans clustering."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixtureModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class GaussianMixturePrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist GaussianMixture Model to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixture
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class GaussianMixtureTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "GaussianMixture clustering."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeansModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class KmeansPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Make use of a exist KmeansModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeans
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class KmeansTraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "Kmeans clustering."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.{DistributedLDAModel, LDAModel, LocalLDAMo
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class LDAPrediction extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com
|
||||
val description: String = "Make use of a exist LDAModel to predict."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.LDA
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class LDATraining extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "LDA clustering."
|
||||
val inportList: List[String] = List(PortEnum.NonePort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
|
|
@ -9,7 +9,7 @@ import org.apache.spark.ml.feature.Word2VecModel
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class WordToVec extends ConfigurableStop{
|
||||
val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
val authorEmail: String = "06whuxx@163.com"
|
||||
val description: String = "transfer word to vector"
|
||||
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
|
||||
|
@ -104,12 +104,12 @@ class WordToVec extends ConfigurableStop{
|
|||
|
||||
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
|
||||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").defaultValue("").required(false)
|
||||
val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("").defaultValue("").required(false)
|
||||
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("").defaultValue("").required(false)
|
||||
val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("").defaultValue("").required(false)
|
||||
val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").description("The dimension of the code that you want to transform from words. Default: 100").defaultValue("").required(false)
|
||||
val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("Sets the maximum length (in words) of each sentence in the input data. Any sentence longer than this threshold will be divided into chunks of up to maxSentenceLength size. Default: 1000").defaultValue("").required(false)
|
||||
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations (>= 0)").defaultValue("").required(false)
|
||||
val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("The minimum number of times a token must appear to be included in the word2vec model's vocabulary. Default: 5").defaultValue("").required(false)
|
||||
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").defaultValue("").required(false)
|
||||
val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("").defaultValue("").required(false)
|
||||
val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("Param for Step size to be used for each iteration of optimization (> 0).").defaultValue("").required(false)
|
||||
val colName=new PropertyDescriptor().name("colName").displayName("INPUT_COL").description("").defaultValue("").required(true)
|
||||
val outputCol=new PropertyDescriptor().name("outputCol").displayName("OUTPUT_COL").description("").defaultValue("").required(true)
|
||||
descriptor = vectorSize :: descriptor
|
||||
|
|
|
@ -97,5 +97,5 @@ class ReadFromRedis extends ConfigurableStop{
|
|||
List(StopGroupEnum.RedisGroup.toString)
|
||||
}
|
||||
|
||||
override val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
override val authorEmail: String = "06whuxx@163.com"
|
||||
}
|
||||
|
|
|
@ -69,5 +69,5 @@ class WriteToRedis extends ConfigurableStop{
|
|||
List(StopGroupEnum.RedisGroup.toString)
|
||||
}
|
||||
|
||||
override val authorEmail: String = "xiaoxiao@cnic.cn"
|
||||
override val authorEmail: String = "06whuxx@163.com"
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue