Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
259da94bbe
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"flow":{
|
||||
"name":"mockData",
|
||||
"uuid":"1234",
|
||||
"stops":[
|
||||
{
|
||||
"uuid":"0000",
|
||||
"name":"MockData",
|
||||
"bundle":"cn.piflow.bundle.common.MockData",
|
||||
"properties":{
|
||||
"schema": "name:String:true, age:Int, weight:Double, totalMoney:Float, isStudent:Boolean",
|
||||
"count": "10"
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"paths":[]
|
||||
}
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 31 KiB |
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"flow":{
|
||||
"name":"scalaTest",
|
||||
"uuid":"1234567890",
|
||||
"stops":[
|
||||
{
|
||||
"uuid":"1111",
|
||||
"name":"CsvParser",
|
||||
"bundle":"cn.piflow.bundle.csv.CsvParser",
|
||||
"properties":{
|
||||
"csvPath":"hdfs://10.0.88.13:9000/xjzhu/test.csv",
|
||||
"header":"false",
|
||||
"delimiter":",",
|
||||
"schema":"title,author"
|
||||
}
|
||||
},
|
||||
{
|
||||
"uuid":"2222",
|
||||
"name":"ExecuteScala",
|
||||
"bundle":"cn.piflow.bundle.script.ExecuteScala",
|
||||
"properties":{
|
||||
"script":"import sys\nimport os\n\nimport numpy as np\nfrom scipy import linalg\nimport pandas as pd\n\nimport matplotlib\nmatplotlib.use('Agg')\n\n\ndef listFunction(dictInfo):\n\n return dictInfo",
|
||||
"execFunction": "listFunction"
|
||||
}
|
||||
}
|
||||
],
|
||||
"paths":[
|
||||
{
|
||||
"from":"CsvParser",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ExecuteScala"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
package cn.piflow.bundle.common
|
||||
|
||||
import java.time.LocalDate
|
||||
|
||||
import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
|
||||
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
|
||||
import cn.piflow.conf.bean.PropertyDescriptor
|
||||
import cn.piflow.conf.util.{ImageUtil, MapUtil}
|
||||
import jodd.datetime.JDateTime
|
||||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
import org.apache.spark.sql.types._
|
||||
import org.json4s
|
||||
import org.json4s.JsonAST._
|
||||
import org.json4s.jackson.JsonMethods._
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
class MockData extends ConfigurableStop{
|
||||
override val authorEmail: String = "xjzhu@cnic.cn"
|
||||
override val description: String = "Mock dataframe."
|
||||
override val inportList: List[String] = List(Port.NonePort)
|
||||
override val outportList: List[String] = List(Port.DefaultPort)
|
||||
|
||||
var schema: String = _
|
||||
var count: Int = _
|
||||
|
||||
override def setProperties(map: Map[String, Any]): Unit = {
|
||||
schema = MapUtil.get(map,"schema").asInstanceOf[String]
|
||||
count = MapUtil.get(map,"count").asInstanceOf[String].toInt
|
||||
}
|
||||
|
||||
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
|
||||
var descriptor : List[PropertyDescriptor] = List()
|
||||
val schema = new PropertyDescriptor()
|
||||
.name("schema")
|
||||
.displayName("Schema")
|
||||
.description("The schema of mock data, schema's format is column:columnType:isNullable." +
|
||||
"columnType can be String/Int/Long/Float/Double/Boolean. " +
|
||||
"isNullable can be left blank, the default value is false")
|
||||
.defaultValue("")
|
||||
.required(true)
|
||||
.example("id:String,name:String,age:Int")
|
||||
descriptor = schema :: descriptor
|
||||
|
||||
val count = new PropertyDescriptor()
|
||||
.name("count")
|
||||
.displayName("Count")
|
||||
.description("The count of dataframe")
|
||||
.defaultValue("")
|
||||
.required(true)
|
||||
.example("10")
|
||||
descriptor = count :: descriptor
|
||||
|
||||
descriptor
|
||||
}
|
||||
|
||||
override def getIcon(): Array[Byte] = {
|
||||
ImageUtil.getImage("icon/common/MockData.png")
|
||||
}
|
||||
|
||||
override def getGroup(): List[String] = {
|
||||
List(StopGroup.CommonGroup)
|
||||
}
|
||||
|
||||
override def initialize(ctx: ProcessContext): Unit = {}
|
||||
|
||||
override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
|
||||
val field = this.schema.split(",")
|
||||
|
||||
val structFieldArray : Array[StructField] = new Array[StructField](field.size)
|
||||
for(i <- 0 to field.size - 1){
|
||||
val columnInfo = field(i).split(":")
|
||||
val column = columnInfo(0)
|
||||
val columnType = columnInfo(1)
|
||||
var isNullable = false
|
||||
if(columnInfo.size == 3){
|
||||
isNullable = columnInfo(2).toBoolean
|
||||
}
|
||||
columnType match {
|
||||
case "String"=> structFieldArray(i) = new StructField(column, StringType, isNullable)
|
||||
case "Int"=>structFieldArray(i) = new StructField(column, IntegerType, isNullable)
|
||||
case "Double"=>structFieldArray(i) = new StructField(column, DoubleType, isNullable)
|
||||
case "Float"=>structFieldArray(i) = new StructField(column, FloatType, isNullable)
|
||||
case "Long"=>structFieldArray(i) = new StructField(column, LongType, isNullable)
|
||||
case "Boolean"=>structFieldArray(i) = new StructField(column, BooleanType, isNullable)
|
||||
//case "Date"=>structFieldArray(i) = new StructField(column, DateType, nullable = true)
|
||||
//case "Timestamp"=>structFieldArray(i) = new StructField(column, TimestampType, nullable = true)
|
||||
}
|
||||
|
||||
}
|
||||
val schemaStructType = StructType(structFieldArray)
|
||||
|
||||
val spark = pec.get[SparkSession]()
|
||||
import spark.implicits._
|
||||
val rnd : Random = new Random()
|
||||
val i = randomJson(rnd,schemaStructType)
|
||||
val df = spark.read.schema(schemaStructType).json((0 to count).map{ _ => compact(randomJson(rnd,schemaStructType))}.toDS())
|
||||
out.write(df)
|
||||
|
||||
}
|
||||
|
||||
private def randomJson( rnd: Random, dataType : DataType): JValue ={
|
||||
|
||||
val alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
dataType match {
|
||||
case v:DoubleType =>
|
||||
json4s.JDouble(rnd.nextDouble())
|
||||
case v:StringType =>
|
||||
JString((1 to 10).map(x => alpha(Random.nextInt.abs % alpha.size)).mkString)
|
||||
case v:IntegerType =>
|
||||
JInt(rnd.nextInt(100))
|
||||
case v:LongType =>
|
||||
JInt(rnd.nextLong())
|
||||
case v:FloatType =>
|
||||
JDouble(rnd.nextFloat())
|
||||
case v:BooleanType =>
|
||||
JBool(rnd.nextBoolean())
|
||||
case v:ArrayType =>
|
||||
val size = rnd.nextInt(10)
|
||||
JArray(
|
||||
(0 to size).map(_ => randomJson(rnd, v.elementType)).toList
|
||||
)
|
||||
case v:StructType =>
|
||||
JObject(
|
||||
v.fields.flatMap{
|
||||
f =>
|
||||
if( f.nullable && rnd.nextBoolean())
|
||||
None
|
||||
else
|
||||
Some(JField(f.name, randomJson(rnd, f.dataType)))
|
||||
}.toList
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -214,7 +214,8 @@ object ClassUtil {
|
|||
("defaultValue" -> property.defaultValue) ~
|
||||
("allowableValues" -> property.allowableValues) ~
|
||||
("required" -> property.required.toString) ~
|
||||
("sensitive" -> property.sensitive.toString)) }) )
|
||||
("sensitive" -> property.sensitive.toString) ~
|
||||
("example" -> property.example)) }) )
|
||||
val jsonString = compactRender(json)
|
||||
jsonString
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
package cn.piflow.bundle.common
|
||||
|
||||
import java.net.InetAddress
|
||||
|
||||
import cn.piflow.Runner
|
||||
import cn.piflow.conf.bean.FlowBean
|
||||
import cn.piflow.conf.util.{FileUtil, OptionUtil}
|
||||
import cn.piflow.util.{PropertyUtil, ServerIpUtil}
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.h2.tools.Server
|
||||
import org.junit.Test
|
||||
|
||||
import scala.util.parsing.json.JSON
|
||||
|
||||
class MockDataTest {
|
||||
|
||||
@Test
|
||||
def testFlow(): Unit ={
|
||||
|
||||
//parse flow json
|
||||
val file = "src/main/resources/flow/common/mockData.json"
|
||||
val flowJsonStr = FileUtil.fileReader(file)
|
||||
val map = OptionUtil.getAny(JSON.parseFull(flowJsonStr)).asInstanceOf[Map[String, Any]]
|
||||
println(map)
|
||||
|
||||
//create flow
|
||||
val flowBean = FlowBean(map)
|
||||
val flow = flowBean.constructFlow()
|
||||
|
||||
|
||||
val ip = InetAddress.getLocalHost.getHostAddress
|
||||
cn.piflow.util.FileUtil.writeFile("server.ip=" + ip, ServerIpUtil.getServerIpFile())
|
||||
val h2Server = Server.createTcpServer("-tcp", "-tcpAllowOthers", "-tcpPort","50001").start()
|
||||
//execute flow
|
||||
val spark = SparkSession.builder()
|
||||
.master("local[12]")
|
||||
.appName("hive")
|
||||
.config("spark.driver.memory", "4g")
|
||||
.config("spark.executor.memory", "8g")
|
||||
.config("spark.cores.max", "8")
|
||||
.config("hive.metastore.uris",PropertyUtil.getPropertyValue("hive.metastore.uris"))
|
||||
.enableHiveSupport()
|
||||
.getOrCreate()
|
||||
|
||||
val process = Runner.create()
|
||||
.bind(classOf[SparkSession].getName, spark)
|
||||
.bind("checkpoint.path", "")
|
||||
.bind("debug.path","")
|
||||
.start(flow);
|
||||
|
||||
process.awaitTermination();
|
||||
val pid = process.pid();
|
||||
println(pid + "!!!!!!!!!!!!!!!!!!!!!")
|
||||
spark.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -8,7 +8,7 @@ object HTTPClientGetStopInfo {
|
|||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val url = "http://10.0.86.98:8001/stop/info?bundle=cn.piflow.bundle.jdbc.JdbcWrite"
|
||||
val url = "http://10.0.85.83:8001/stop/info?bundle=cn.piflow.bundle.csv.CsvParser"
|
||||
val client = HttpClients.createDefault()
|
||||
val getFlowInfo:HttpGet = new HttpGet(url)
|
||||
|
||||
|
|
Loading…
Reference in New Issue