Merge remote-tracking branch 'origin/master'

This commit is contained in:
yanggang 2018-11-26 15:43:02 +08:00
commit 7921775508
4 changed files with 839 additions and 2 deletions

View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module version="4">
<component name="FacetManager">
<facet type="Osmorc" name="OSGi">
<configuration manifestGenerationMode="Manually" manifestLocation="lib/commons-pool2-2.4.2.jar/META-INF/MANIFEST.MF" jarfileLocation="piflow-bundle.jar" outputPathType="CompilerOutputPath" bndFileLocation="" bundlorFileLocation="" bundleActivator="" bundleSymbolicName="" bundleVersion="1.0.0" ignoreFilePattern="" useProjectDefaultManifestFileLocation="false" alwaysRebuildBundleJAR="false" doNotSynchronizeWithMaven="false">
<additionalProperties />
<additionalJARContents />
</configuration>
</facet>
</component>
</module>

View File

@ -0,0 +1,413 @@
{
"flow":{
"name":"test",
"uuid":"1234",
"checkpoint":"Merge",
"stops":[
{
"uuid":"1111",
"name":"UnzipFilesOnHDFS",
"bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
"properties":{
"filePath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml.gz",
"fileType":"gz",
"unzipPath":"hdfs://10.0.86.89:9000/yqd/dblp_1/"
}
},
{
"uuid":"2222",
"name":"Fork",
"bundle":"cn.piflow.bundle.common.Fork",
"properties":{
"outports":"out1,out2,out3,out4,out5,out6,out7,out8"
}
},
{
"uuid":"3333",
"name":"XmlParser1",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"phdthesis",
"schema":""
}
},
{
"uuid":"4444",
"name":"ConvertSchema1",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"5555",
"name":"PutHiveStreaming1",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_phdthesis_all_1"
}
},
{
"uuid":"6666",
"name":"XmlParser2",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"mastersthesis",
"schema":""
}
},
{
"uuid":"7777",
"name":"ConvertSchema2",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"8888",
"name":"PutHiveStreaming2",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_mastersthesis_all_1"
}
},
{
"uuid":"9999",
"name":"XmlParser3",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"www",
"schema":""
}
},
{
"uuid":"10101010",
"name":"ConvertSchema3",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"11111111",
"name":"PutHiveStreaming3",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_www_all_1"
}
},
{
"uuid":"12121212",
"name":"XmlParser4",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"incollection",
"schema":""
}
},
{
"uuid":"13131313",
"name":"ConvertSchema4",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"14141414",
"name":"PutHiveStreaming4",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_incollection_all_1"
}
},
{
"uuid":"15151515",
"name":"XmlParser5",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"book",
"schema":""
}
},
{
"uuid":"16161616",
"name":"ConvertSchema5",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"17171717",
"name":"PutHiveStreaming5",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_book_all_1"
}
},
{
"uuid":"18181818",
"name":"XmlParser6",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"proceedings",
"schema":""
}
},
{
"uuid":"19191919",
"name":"ConvertSchema6",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"20202020",
"name":"PutHiveStreaming6",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_proceedings_all_1"
}
},
{
"uuid":"21212121",
"name":"XmlParser7",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"inproceedings",
"schema":""
}
},
{
"uuid":"22222222",
"name":"ConvertSchema7",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"23232323",
"name":"PutHiveStreaming7",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_inproceedings_all_1"
}
},
{
"uuid":"24242424",
"name":"XmlParser8",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"article",
"schema":""
}
},
{
"uuid":"25252525",
"name":"ConvertSchema8",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"26262626",
"name":"PutHiveStreaming8",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_article_all_1"
}
}
],
"paths":[
{
"from":"UnzipFilesOnHDFS",
"outport":"",
"inport":"",
"to":"Fork"
},{
"from":"Fork",
"outport":"out1",
"inport":"",
"to":"XmlParser1"
},{
"from":"XmlParser1",
"outport":"",
"inport":"",
"to":"ConvertSchema1"
},{
"from":"ConvertSchema1",
"outport":"",
"inport":"",
"to":"PutHiveStreaming1"
},{
"from":"Fork",
"outport":"out2",
"inport":"",
"to":"XmlParser2"
},{
"from":"XmlParser2",
"outport":"",
"inport":"",
"to":"ConvertSchema2"
},{
"from":"ConvertSchema2",
"outport":"",
"inport":"",
"to":"PutHiveStreaming2"
},{
"from":"Fork",
"outport":"out3",
"inport":"",
"to":"XmlParser3"
},{
"from":"XmlParser3",
"outport":"",
"inport":"",
"to":"ConvertSchema3"
},{
"from":"ConvertSchema3",
"outport":"",
"inport":"",
"to":"PutHiveStreaming3"
},{
"from":"Fork",
"outport":"out4",
"inport":"",
"to":"XmlParser4"
},{
"from":"XmlParser4",
"outport":"",
"inport":"",
"to":"ConvertSchema4"
},{
"from":"ConvertSchema4",
"outport":"",
"inport":"",
"to":"PutHiveStreaming4"
},{
"from":"Fork",
"outport":"out5",
"inport":"",
"to":"XmlParser5"
},{
"from":"XmlParser5",
"outport":"",
"inport":"",
"to":"ConvertSchema5"
},{
"from":"ConvertSchema5",
"outport":"",
"inport":"",
"to":"PutHiveStreaming5"
},{
"from":"Fork",
"outport":"out6",
"inport":"",
"to":"XmlParser6"
},{
"from":"XmlParser6",
"outport":"",
"inport":"",
"to":"ConvertSchema6"
},{
"from":"ConvertSchema6",
"outport":"",
"inport":"",
"to":"PutHiveStreaming6"
},{
"from":"Fork",
"outport":"out7",
"inport":"",
"to":"XmlParser7"
},{
"from":"XmlParser7",
"outport":"",
"inport":"",
"to":"ConvertSchema7"
},{
"from":"ConvertSchema7",
"outport":"",
"inport":"",
"to":"PutHiveStreaming7"
},{
"from":"Fork",
"outport":"out8",
"inport":"",
"to":"XmlParser8"
},{
"from":"XmlParser8",
"outport":"",
"inport":"",
"to":"ConvertSchema8"
},{
"from":"ConvertSchema8",
"outport":"",
"inport":"",
"to":"PutHiveStreaming8"
}
]
}
}

View File

@ -0,0 +1,413 @@
{
"flow":{
"name":"test",
"uuid":"1234",
"checkpoint":"Merge",
"stops":[
{
"uuid":"1111",
"name":"UnzipFilesOnHDFS",
"bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
"properties":{
"filePath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml.gz",
"fileType":"gz",
"unzipPath":"hdfs://10.0.86.89:9000/yqd/dblp_1/"
}
},
{
"uuid":"2222",
"name":"Fork",
"bundle":"cn.piflow.bundle.common.Fork",
"properties":{
"outports":"out1,out2,out3,out4,out5,out6,out7,out8"
}
},
{
"uuid":"3333",
"name":"XmlParser1",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"phdthesis",
"schema":""
}
},
{
"uuid":"4444",
"name":"ConvertSchema1",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"5555",
"name":"PutHiveStreaming1",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_phdthesis_all_1"
}
},
{
"uuid":"6666",
"name":"XmlParser2",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"mastersthesis",
"schema":""
}
},
{
"uuid":"7777",
"name":"ConvertSchema2",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"8888",
"name":"PutHiveStreaming2",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_mastersthesis_all_1"
}
},
{
"uuid":"9999",
"name":"XmlParser3",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"www",
"schema":""
}
},
{
"uuid":"10101010",
"name":"ConvertSchema3",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"11111111",
"name":"PutHiveStreaming3",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_www_all_1"
}
},
{
"uuid":"12121212",
"name":"XmlParser4",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"incollection",
"schema":""
}
},
{
"uuid":"13131313",
"name":"ConvertSchema4",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"14141414",
"name":"PutHiveStreaming4",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_incollection_all_1"
}
},
{
"uuid":"15151515",
"name":"XmlParser5",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"book",
"schema":""
}
},
{
"uuid":"16161616",
"name":"ConvertSchema5",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"17171717",
"name":"PutHiveStreaming5",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_book_all_1"
}
},
{
"uuid":"18181818",
"name":"XmlParser6",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"proceedings",
"schema":""
}
},
{
"uuid":"19191919",
"name":"ConvertSchema6",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"20202020",
"name":"PutHiveStreaming6",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_proceedings_all_1"
}
},
{
"uuid":"21212121",
"name":"XmlParser7",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"inproceedings",
"schema":""
}
},
{
"uuid":"22222222",
"name":"ConvertSchema7",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"23232323",
"name":"PutHiveStreaming7",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_inproceedings_all_1"
}
},
{
"uuid":"24242424",
"name":"XmlParser8",
"bundle":"cn.piflow.bundle.xml.XmlParser",
"properties":{
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
"rowTag":"article",
"schema":""
}
},
{
"uuid":"25252525",
"name":"ConvertSchema8",
"bundle":"cn.piflow.bundle.common.ConvertSchema",
"properties":{
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
}
},
{
"uuid":"26262626",
"name":"PutHiveStreaming8",
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
"properties":{
"database":"sparktest",
"table":"yqd_dblp_article_all_1"
}
}
],
"paths":[
{
"from":"UnzipFilesOnHDFS",
"outport":"",
"inport":"",
"to":"Fork"
},{
"from":"Fork",
"outport":"out1",
"inport":"",
"to":"XmlParser1"
},{
"from":"XmlParser1",
"outport":"",
"inport":"",
"to":"ConvertSchema1"
},{
"from":"ConvertSchema1",
"outport":"",
"inport":"",
"to":"PutHiveStreaming1"
},{
"from":"Fork",
"outport":"out2",
"inport":"",
"to":"XmlParser2"
},{
"from":"XmlParser2",
"outport":"",
"inport":"",
"to":"ConvertSchema2"
},{
"from":"ConvertSchema2",
"outport":"",
"inport":"",
"to":"PutHiveStreaming2"
},{
"from":"Fork",
"outport":"out3",
"inport":"",
"to":"XmlParser3"
},{
"from":"XmlParser3",
"outport":"",
"inport":"",
"to":"ConvertSchema3"
},{
"from":"ConvertSchema3",
"outport":"",
"inport":"",
"to":"PutHiveStreaming3"
},{
"from":"Fork",
"outport":"out4",
"inport":"",
"to":"XmlParser4"
},{
"from":"XmlParser4",
"outport":"",
"inport":"",
"to":"ConvertSchema4"
},{
"from":"ConvertSchema4",
"outport":"",
"inport":"",
"to":"PutHiveStreaming4"
},{
"from":"Fork",
"outport":"out5",
"inport":"",
"to":"XmlParser5"
},{
"from":"XmlParser5",
"outport":"",
"inport":"",
"to":"ConvertSchema5"
},{
"from":"ConvertSchema5",
"outport":"",
"inport":"",
"to":"PutHiveStreaming5"
},{
"from":"Fork",
"outport":"out6",
"inport":"",
"to":"XmlParser6"
},{
"from":"XmlParser6",
"outport":"",
"inport":"",
"to":"ConvertSchema6"
},{
"from":"ConvertSchema6",
"outport":"",
"inport":"",
"to":"PutHiveStreaming6"
},{
"from":"Fork",
"outport":"out7",
"inport":"",
"to":"XmlParser7"
},{
"from":"XmlParser7",
"outport":"",
"inport":"",
"to":"ConvertSchema7"
},{
"from":"ConvertSchema7",
"outport":"",
"inport":"",
"to":"PutHiveStreaming7"
},{
"from":"Fork",
"outport":"out8",
"inport":"",
"to":"XmlParser8"
},{
"from":"XmlParser8",
"outport":"",
"inport":"",
"to":"ConvertSchema8"
},{
"from":"ConvertSchema8",
"outport":"",
"inport":"",
"to":"PutHiveStreaming8"
}
]
}
}

View File

@ -16,14 +16,14 @@ class ConvertSchema extends ConfigurableStop {
var schema:String = _
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
val df = in.read()
var df = in.read()
//oldField1->newField1, oldField2->newField2
val field = schema.split(",")
field.foreach(f => {
val old_new: Array[String] = f.split("->")
df.withColumnRenamed(old_new(0),old_new(1))
df = df.withColumnRenamed(old_new(0),old_new(1))
})
println("###########################")