alter email and add description for properties

This commit is contained in:
xiaoxiao 2018-11-22 17:23:29 +08:00
parent e97fac9d84
commit d02f3da4ee
33 changed files with 63 additions and 63 deletions

View File

@ -10,7 +10,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.spark.sql.SparkSession
class EmailClean extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Clean email format data."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -14,7 +14,7 @@ import org.apache.spark.sql.SparkSession
import scala.reflect.macros.ParseException
class IdentityNumberClean extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Clean Id Card data."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.spark.sql.SparkSession
class PhoneNumberClean extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Clean phone number format data."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.spark.sql.SparkSession
class TitleClean extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Clean title format data."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -10,7 +10,7 @@ import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
class FetchFile extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Fetch file from hdfs to local."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -13,10 +13,10 @@ import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.SparkSession
class PutFile extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Put local file to hdfs."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
var hdfs_path:String =_
var local_path:String=_
var fs:FileSystem=null

View File

@ -9,7 +9,7 @@ import org.apache.spark.sql.SparkSession
class RegexTextProcess extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Use regex to replace text."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -10,7 +10,7 @@ import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import org.apache.spark.sql.SparkSession
class LoadZipFromUrl extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "DownLoad zip file by http."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -12,7 +12,7 @@ import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInp
import org.apache.spark.sql.{DataFrame, SparkSession}
class UnGZip extends ConfigurableStop {
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Unzip tar.gz, tar, gz file."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -99,5 +99,5 @@ class ReadFromKafka extends ConfigurableStop{
List(StopGroupEnum.KafkaGroup.toString)
}
override val authorEmail: String = "xiaoxiao@cnic.cn"
override val authorEmail: String = "06whuxx@163.com"
}

View File

@ -82,5 +82,5 @@ class WriteToKafka extends ConfigurableStop{
List(StopGroupEnum.KafkaGroup.toString)
}
override val authorEmail: String = "xiaoxiao@cnic.cn"
override val authorEmail: String = "06whuxx@163.com"
}

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.sql.SparkSession
class DecisionTreePrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist DecisionTreeModel to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.sql.SparkSession
class DecisionTreeTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Training a DecisionTreeModel."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
@ -95,10 +95,10 @@ class DecisionTreeTraining extends ConfigurableStop{
var descriptor : List[PropertyDescriptor] = List()
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(true)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(true)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(true)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(true)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(true)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(true)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(true)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(true)
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(true)
descriptor = training_data_path :: descriptor
descriptor = model_save_path :: descriptor

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassificationModel
import org.apache.spark.sql.SparkSession
class GBTPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist GBT Model to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.GBTClassifier
import org.apache.spark.sql.SparkSession
class GBTTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Training a GBT Model."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
@ -118,14 +118,14 @@ class GBTTraining extends ConfigurableStop{
var descriptor : List[PropertyDescriptor] = List()
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false)
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false)
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false)
val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("ddd").defaultValue("").required(false)
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("ddd").defaultValue("").required(false)
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false)
val lossType=new PropertyDescriptor().name("lossType").displayName("LOSS_TYPE").description("Loss function which GBT tries to minimize. (case-insensitive) Supported: \"logistic\" (default = logistic)").defaultValue("").required(false)
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").description("Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator. (default = 0.1)").defaultValue("").required(false)
descriptor = training_data_path :: descriptor
descriptor = model_save_path :: descriptor
descriptor = maxBins :: descriptor

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.LogisticRegressionModel
import org.apache.spark.sql.SparkSession
class LogisticRegressionPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist LogisticRegressionModel to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.ml.classification.LogisticRegression
class LogisticRegressionTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Training a LogisticRegressionModel."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
@ -104,12 +104,12 @@ class LogisticRegressionTraining extends ConfigurableStop{
var descriptor : List[PropertyDescriptor] = List()
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("ddd").defaultValue("").required(true)
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("ddd").defaultValue("").required(true)
val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("ddd").defaultValue("").required(true)
val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("ddd").defaultValue("").required(true)
val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("ddd").defaultValue("").required(true)
val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("ddd").defaultValue("").required(true)
val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("ddd").defaultValue("").required(true)
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations").defaultValue("").required(false)
val minTol=new PropertyDescriptor().name("minTol").displayName("MIN_TOL").description("Param for the convergence tolerance for iterative algorithms (>= 0)").defaultValue("").required(false)
val regParam=new PropertyDescriptor().name("regParam").displayName("REG_PARAM").description("Param for regularization parameter (>= 0)").defaultValue("").required(false)
val elasticNetParam=new PropertyDescriptor().name("elasticNetParam").displayName("ELASTIC_NET_PARAM").description("Param for the ElasticNet mixing parameter, in range [0, 1]").defaultValue("").required(false)
val threshold=new PropertyDescriptor().name("threshold").displayName("THRESHOLD").description("Param for threshold in binary classification prediction, in range [0, 1]").defaultValue("").required(false)
val family=new PropertyDescriptor().name("family").displayName("FAMILY").description("Param for the name of family which is a description of the label distribution to be used in the model").defaultValue("").required(false)
descriptor = training_data_path :: descriptor
descriptor = model_save_path :: descriptor
descriptor = maxIter :: descriptor

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.MultilayerPerceptronClassificationMode
import org.apache.spark.sql.SparkSession
class MultilayerPerceptronPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist MultilayerPerceptronModel to predict."
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayesModel
import org.apache.spark.sql.SparkSession
class NaiveBayesPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist NaiveBayesModel to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.NaiveBayes
import org.apache.spark.sql.SparkSession
class NaiveBayesTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Training a NaiveBayesModel."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassificationModel
import org.apache.spark.sql.SparkSession
class RandomForestPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist RandomForest Model to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.sql.SparkSession
class RandomForestTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Training a RandomForestModel."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
@ -118,14 +118,14 @@ class RandomForestTraining extends ConfigurableStop{
var descriptor : List[PropertyDescriptor] = List()
val training_data_path = new PropertyDescriptor().name("training_data_path").displayName("TRAINING_DATA_PATH").defaultValue("").required(true)
val model_save_path = new PropertyDescriptor().name("model_save_path").displayName("MODEL_SAVE_PATH").description("").defaultValue("").required(true)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("ddd").defaultValue("").required(false)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("ddd").defaultValue("").required(false)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("ddd").defaultValue("").required(false)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("ddd").defaultValue("").required(false)
val maxBins=new PropertyDescriptor().name("maxBins").displayName("MAX_BINS").description("Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.").defaultValue("").required(false)
val maxDepth=new PropertyDescriptor().name("maxDepth").displayName("MAX_DEPTH").description("Maximum depth of the tree").defaultValue("").required(false)
val minInfoGain=new PropertyDescriptor().name("minInfoGain").displayName("MIN_INFO_GAIN").description("Minimum information gain for a split to be considered at a tree node").defaultValue("").required(false)
val minInstancesPerNode=new PropertyDescriptor().name("minInstancesPerNode").displayName("MIN_INSTANCES_PER_NODE").description("Minimum number of instances each child must have after split.").defaultValue("").required(false)
val impurity=new PropertyDescriptor().name("impurity").displayName("IMPURITY").description("Criterion used for information gain calculation (case-insensitive). Supported: \"entropy\" and \"gini\". (default = gini)").defaultValue("").required(false)
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("ddd").defaultValue("").required(false)
val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("ddd").defaultValue("").required(false)
val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("ddd").defaultValue("").required(false)
val subSamplingRate=new PropertyDescriptor().name("subSamplingRate").displayName("SUB_SAMPLING_RATE").description("Fraction of the training data used for learning each decision tree, in range (0, 1].").defaultValue("").required(false)
val featureSubsetStrategy=new PropertyDescriptor().name("featureSubsetStrategy").displayName("FEATURE_SUBSET_STRATEGY").description("The number of features to consider for splits at each tree node.").defaultValue("").required(false)
val numTrees=new PropertyDescriptor().name("numTrees").displayName("NUM_TREES").description("Number of trees to train (>= 1).").defaultValue("").required(false)
descriptor = training_data_path :: descriptor
descriptor = model_save_path :: descriptor
descriptor = maxBins :: descriptor

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeansModel
import org.apache.spark.sql.SparkSession
class BisectingKMeansPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist BisectingKMeans Model to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.BisectingKMeans
import org.apache.spark.sql.SparkSession
class BisectingKMeansTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "BisectingKMeans clustering."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixtureModel
import org.apache.spark.sql.SparkSession
class GaussianMixturePrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist GaussianMixture Model to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.GaussianMixture
import org.apache.spark.sql.SparkSession
class GaussianMixtureTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "GaussianMixture clustering."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeansModel
import org.apache.spark.sql.SparkSession
class KmeansPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Make use of a exist KmeansModel to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.sql.SparkSession
class KmeansTraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "Kmeans clustering."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.{DistributedLDAModel, LDAModel, LocalLDAMo
import org.apache.spark.sql.SparkSession
class LDAPrediction extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com
val description: String = "Make use of a exist LDAModel to predict."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -8,7 +8,7 @@ import org.apache.spark.ml.clustering.LDA
import org.apache.spark.sql.SparkSession
class LDATraining extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "LDA clustering."
val inportList: List[String] = List(PortEnum.NonePort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)

View File

@ -9,7 +9,7 @@ import org.apache.spark.ml.feature.Word2VecModel
import org.apache.spark.sql.SparkSession
class WordToVec extends ConfigurableStop{
val authorEmail: String = "xiaoxiao@cnic.cn"
val authorEmail: String = "06whuxx@163.com"
val description: String = "transfer word to vector"
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
@ -104,12 +104,12 @@ class WordToVec extends ConfigurableStop{
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
var descriptor : List[PropertyDescriptor] = List()
val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").defaultValue("").required(false)
val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("").defaultValue("").required(false)
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("").defaultValue("").required(false)
val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("").defaultValue("").required(false)
val vectorSize = new PropertyDescriptor().name("vectorSize").displayName("VECTOR_SIZE").description("The dimension of the code that you want to transform from words. Default: 100").defaultValue("").required(false)
val maxSentenceLength = new PropertyDescriptor().name("maxSentenceLength").displayName("MAX_SENTENCE_LENGTH").description("Sets the maximum length (in words) of each sentence in the input data. Any sentence longer than this threshold will be divided into chunks of up to maxSentenceLength size. Default: 1000").defaultValue("").required(false)
val maxIter=new PropertyDescriptor().name("maxIter").displayName("MAX_ITER").description("Param for maximum number of iterations (>= 0)").defaultValue("").required(false)
val minCount=new PropertyDescriptor().name("minCount").displayName("MIN_COUNT").description("The minimum number of times a token must appear to be included in the word2vec model's vocabulary. Default: 5").defaultValue("").required(false)
val stepSize=new PropertyDescriptor().name("stepSize").displayName("STEP_SIZE").defaultValue("").required(false)
val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("").defaultValue("").required(false)
val numPartitions=new PropertyDescriptor().name("numPartitions").displayName("NUM_PARTITIONS").description("Param for Step size to be used for each iteration of optimization (> 0).").defaultValue("").required(false)
val colName=new PropertyDescriptor().name("colName").displayName("INPUT_COL").description("").defaultValue("").required(true)
val outputCol=new PropertyDescriptor().name("outputCol").displayName("OUTPUT_COL").description("").defaultValue("").required(true)
descriptor = vectorSize :: descriptor

View File

@ -97,5 +97,5 @@ class ReadFromRedis extends ConfigurableStop{
List(StopGroupEnum.RedisGroup.toString)
}
override val authorEmail: String = "xiaoxiao@cnic.cn"
override val authorEmail: String = "06whuxx@163.com"
}

View File

@ -69,5 +69,5 @@ class WriteToRedis extends ConfigurableStop{
List(StopGroupEnum.RedisGroup.toString)
}
override val authorEmail: String = "xiaoxiao@cnic.cn"
override val authorEmail: String = "06whuxx@163.com"
}