forked from opensci/piflow
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
3d2fc64705
194
README.md
194
README.md
|
@ -1,5 +1,5 @@
|
||||||
![](https://github.com/cas-bigdatalab/piflow/blob/master/doc/piflow.png)
|
![](https://github.com/cas-bigdatalab/piflow/blob/master/doc/piflow.png)
|
||||||
**Piflow** is an easy to use, powerful big data pipeline system.
|
is an easy to use, powerful big data pipeline system.
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
|
|
||||||
|
@ -30,3 +30,195 @@
|
||||||
* Git Client (used during build process by 'bower' plugin)
|
* Git Client (used during build process by 'bower' plugin)
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
|
To Build:
|
||||||
|
`mvn clean package -Dmaven.test.skip=true`
|
||||||
|
|
||||||
|
[INFO] Replacing original artifact with shaded artifact.
|
||||||
|
[INFO] Replacing /opt/project/piflow/piflow-server/target/piflow-server-0.9.jar with /opt/project/piflow/piflow-server/target/piflow-server-0.9-shaded.jar
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[INFO] Reactor Summary:
|
||||||
|
[INFO]
|
||||||
|
[INFO] piflow-project ..................................... SUCCESS [ 4.602 s]
|
||||||
|
[INFO] piflow-core ........................................ SUCCESS [ 56.533 s]
|
||||||
|
[INFO] piflow-bundle ...................................... SUCCESS [02:15 min]
|
||||||
|
[INFO] piflow-server ...................................... SUCCESS [03:01 min]
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[INFO] BUILD SUCCESS
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
[INFO] Total time: 06:18 min
|
||||||
|
[INFO] Finished at: 2018-12-24T16:54:16+08:00
|
||||||
|
[INFO] Final Memory: 41M/812M
|
||||||
|
[INFO] ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
To Run Piflow Server:
|
||||||
|
- configure config.properties
|
||||||
|
|
||||||
|
#server ip and port
|
||||||
|
server.ip=10.0.86.191
|
||||||
|
server.port=8002
|
||||||
|
h2.port=50002
|
||||||
|
|
||||||
|
#spark and yarn config
|
||||||
|
spark.master=yarn
|
||||||
|
spark.deploy.mode=cluster
|
||||||
|
yarn.resourcemanager.hostname=10.0.86.191
|
||||||
|
yarn.resourcemanager.address=10.0.86.191:8032
|
||||||
|
yarn.access.namenode=hdfs://10.0.86.191:9000
|
||||||
|
yarn.stagingDir=hdfs://10.0.86.191:9000/tmp/
|
||||||
|
yarn.jars=hdfs://10.0.86.191:9000/user/spark/share/lib/*.jar
|
||||||
|
yarn.url=http://10.0.86.191:8088/ws/v1/cluster/apps/
|
||||||
|
|
||||||
|
#hive config
|
||||||
|
hive.metastore.uris=thrift://10.0.86.191:9083
|
||||||
|
|
||||||
|
#piflow jar path
|
||||||
|
piflow.bundle=/opt/piflowServer/piflow-server-0.9.jar
|
||||||
|
|
||||||
|
#checkpoint hdfs path
|
||||||
|
checkpoint.path=hdfs://10.0.86.89:9000/piflow/checkpoints/
|
||||||
|
- you can run piflow server on intellij
|
||||||
|
- main class is cn.piflow.api.Main
|
||||||
|
- remember to set SPARK_HOME
|
||||||
|
- you can run piflow server as follows:
|
||||||
|
- download piflowServer:***
|
||||||
|
- edit config.properties
|
||||||
|
- run start.sh
|
||||||
|
|
||||||
|
To Run Piflow Web:
|
||||||
|
- todo
|
||||||
|
|
||||||
|
To Use:
|
||||||
|
|
||||||
|
- command line
|
||||||
|
- flow config example
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
"flow":{
|
||||||
|
"name":"test",
|
||||||
|
"uuid":"1234",
|
||||||
|
"checkpoint":"Merge",
|
||||||
|
"stops":[
|
||||||
|
{
|
||||||
|
"uuid":"1111",
|
||||||
|
"name":"XmlParser",
|
||||||
|
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||||
|
"properties":{
|
||||||
|
"xmlpath":"hdfs://10.0.86.89:9000/xjzhu/dblp.mini.xml",
|
||||||
|
"rowTag":"phdthesis"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"2222",
|
||||||
|
"name":"SelectField",
|
||||||
|
"bundle":"cn.piflow.bundle.common.SelectField",
|
||||||
|
"properties":{
|
||||||
|
"schema":"title,author,pages"
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"3333",
|
||||||
|
"name":"PutHiveStreaming",
|
||||||
|
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||||
|
"properties":{
|
||||||
|
"database":"sparktest",
|
||||||
|
"table":"dblp_phdthesis"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"4444",
|
||||||
|
"name":"CsvParser",
|
||||||
|
"bundle":"cn.piflow.bundle.csv.CsvParser",
|
||||||
|
"properties":{
|
||||||
|
"csvPath":"hdfs://10.0.86.89:9000/xjzhu/phdthesis.csv",
|
||||||
|
"header":"false",
|
||||||
|
"delimiter":",",
|
||||||
|
"schema":"title,author,pages"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"555",
|
||||||
|
"name":"Merge",
|
||||||
|
"bundle":"cn.piflow.bundle.common.Merge",
|
||||||
|
"properties":{
|
||||||
|
"inports":"data1,data2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"666",
|
||||||
|
"name":"Fork",
|
||||||
|
"bundle":"cn.piflow.bundle.common.Fork",
|
||||||
|
"properties":{
|
||||||
|
"outports":"out1,out2,out3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"777",
|
||||||
|
"name":"JsonSave",
|
||||||
|
"bundle":"cn.piflow.bundle.json.JsonSave",
|
||||||
|
"properties":{
|
||||||
|
"jsonSavePath":"hdfs://10.0.86.89:9000/xjzhu/phdthesis.json"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uuid":"888",
|
||||||
|
"name":"CsvSave",
|
||||||
|
"bundle":"cn.piflow.bundle.csv.CsvSave",
|
||||||
|
"properties":{
|
||||||
|
"csvSavePath":"hdfs://10.0.86.89:9000/xjzhu/phdthesis_result.csv",
|
||||||
|
"header":"true",
|
||||||
|
"delimiter":","
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"paths":[
|
||||||
|
{
|
||||||
|
"from":"XmlParser",
|
||||||
|
"outport":"",
|
||||||
|
"inport":"",
|
||||||
|
"to":"SelectField"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"SelectField",
|
||||||
|
"outport":"",
|
||||||
|
"inport":"data1",
|
||||||
|
"to":"Merge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"CsvParser",
|
||||||
|
"outport":"",
|
||||||
|
"inport":"data2",
|
||||||
|
"to":"Merge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"Merge",
|
||||||
|
"outport":"",
|
||||||
|
"inport":"",
|
||||||
|
"to":"Fork"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"Fork",
|
||||||
|
"outport":"out1",
|
||||||
|
"inport":"",
|
||||||
|
"to":"PutHiveStreaming"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"Fork",
|
||||||
|
"outport":"out2",
|
||||||
|
"inport":"",
|
||||||
|
"to":"JsonSave"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"from":"Fork",
|
||||||
|
"outport":"out3",
|
||||||
|
"inport":"",
|
||||||
|
"to":"CsvSave"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- curl -X POST http://10.0.86.191:8002/flow/start -H "Content-type: application/json" -d 'this is your flow json'
|
||||||
|
- piflow web
|
||||||
|
![](https://github.com/cas-bigdatalab/piflow/blob/master/doc/piflow_web.png)
|
||||||
|
|
Loading…
Reference in New Issue