forked from opensci/piflow
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
7921775508
|
@ -0,0 +1,11 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module version="4">
|
||||
<component name="FacetManager">
|
||||
<facet type="Osmorc" name="OSGi">
|
||||
<configuration manifestGenerationMode="Manually" manifestLocation="lib/commons-pool2-2.4.2.jar/META-INF/MANIFEST.MF" jarfileLocation="piflow-bundle.jar" outputPathType="CompilerOutputPath" bndFileLocation="" bundlorFileLocation="" bundleActivator="" bundleSymbolicName="" bundleVersion="1.0.0" ignoreFilePattern="" useProjectDefaultManifestFileLocation="false" alwaysRebuildBundleJAR="false" doNotSynchronizeWithMaven="false">
|
||||
<additionalProperties />
|
||||
<additionalJARContents />
|
||||
</configuration>
|
||||
</facet>
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,413 @@
|
|||
{
|
||||
"flow":{
|
||||
"name":"test",
|
||||
"uuid":"1234",
|
||||
"checkpoint":"Merge",
|
||||
"stops":[
|
||||
|
||||
{
|
||||
"uuid":"1111",
|
||||
"name":"UnzipFilesOnHDFS",
|
||||
"bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
|
||||
"properties":{
|
||||
"filePath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml.gz",
|
||||
"fileType":"gz",
|
||||
"unzipPath":"hdfs://10.0.86.89:9000/yqd/dblp_1/"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"2222",
|
||||
"name":"Fork",
|
||||
"bundle":"cn.piflow.bundle.common.Fork",
|
||||
"properties":{
|
||||
"outports":"out1,out2,out3,out4,out5,out6,out7,out8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"uuid":"3333",
|
||||
"name":"XmlParser1",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"phdthesis",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"4444",
|
||||
"name":"ConvertSchema1",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"5555",
|
||||
"name":"PutHiveStreaming1",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_phdthesis_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"6666",
|
||||
"name":"XmlParser2",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"mastersthesis",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"7777",
|
||||
"name":"ConvertSchema2",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"8888",
|
||||
"name":"PutHiveStreaming2",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_mastersthesis_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"9999",
|
||||
"name":"XmlParser3",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"www",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"10101010",
|
||||
"name":"ConvertSchema3",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"11111111",
|
||||
"name":"PutHiveStreaming3",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_www_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"12121212",
|
||||
"name":"XmlParser4",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"incollection",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"13131313",
|
||||
"name":"ConvertSchema4",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"14141414",
|
||||
"name":"PutHiveStreaming4",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_incollection_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"15151515",
|
||||
"name":"XmlParser5",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"book",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"16161616",
|
||||
"name":"ConvertSchema5",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"17171717",
|
||||
"name":"PutHiveStreaming5",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_book_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"18181818",
|
||||
"name":"XmlParser6",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"proceedings",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"19191919",
|
||||
"name":"ConvertSchema6",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"20202020",
|
||||
"name":"PutHiveStreaming6",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_proceedings_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"21212121",
|
||||
"name":"XmlParser7",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"inproceedings",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"22222222",
|
||||
"name":"ConvertSchema7",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"23232323",
|
||||
"name":"PutHiveStreaming7",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_inproceedings_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"24242424",
|
||||
"name":"XmlParser8",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"article",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"25252525",
|
||||
"name":"ConvertSchema8",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"26262626",
|
||||
"name":"PutHiveStreaming8",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_article_all_1"
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
],
|
||||
"paths":[
|
||||
{
|
||||
"from":"UnzipFilesOnHDFS",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"Fork"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out1",
|
||||
"inport":"",
|
||||
"to":"XmlParser1"
|
||||
},{
|
||||
"from":"XmlParser1",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema1"
|
||||
},{
|
||||
"from":"ConvertSchema1",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming1"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out2",
|
||||
"inport":"",
|
||||
"to":"XmlParser2"
|
||||
},{
|
||||
"from":"XmlParser2",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema2"
|
||||
},{
|
||||
"from":"ConvertSchema2",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming2"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out3",
|
||||
"inport":"",
|
||||
"to":"XmlParser3"
|
||||
},{
|
||||
"from":"XmlParser3",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema3"
|
||||
},{
|
||||
"from":"ConvertSchema3",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming3"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out4",
|
||||
"inport":"",
|
||||
"to":"XmlParser4"
|
||||
},{
|
||||
"from":"XmlParser4",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema4"
|
||||
},{
|
||||
"from":"ConvertSchema4",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming4"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out5",
|
||||
"inport":"",
|
||||
"to":"XmlParser5"
|
||||
},{
|
||||
"from":"XmlParser5",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema5"
|
||||
},{
|
||||
"from":"ConvertSchema5",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming5"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out6",
|
||||
"inport":"",
|
||||
"to":"XmlParser6"
|
||||
},{
|
||||
"from":"XmlParser6",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema6"
|
||||
},{
|
||||
"from":"ConvertSchema6",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming6"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out7",
|
||||
"inport":"",
|
||||
"to":"XmlParser7"
|
||||
},{
|
||||
"from":"XmlParser7",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema7"
|
||||
},{
|
||||
"from":"ConvertSchema7",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming7"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out8",
|
||||
"inport":"",
|
||||
"to":"XmlParser8"
|
||||
},{
|
||||
"from":"XmlParser8",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema8"
|
||||
},{
|
||||
"from":"ConvertSchema8",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming8"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,413 @@
|
|||
{
|
||||
"flow":{
|
||||
"name":"test",
|
||||
"uuid":"1234",
|
||||
"checkpoint":"Merge",
|
||||
"stops":[
|
||||
|
||||
{
|
||||
"uuid":"1111",
|
||||
"name":"UnzipFilesOnHDFS",
|
||||
"bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
|
||||
"properties":{
|
||||
"filePath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml.gz",
|
||||
"fileType":"gz",
|
||||
"unzipPath":"hdfs://10.0.86.89:9000/yqd/dblp_1/"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"2222",
|
||||
"name":"Fork",
|
||||
"bundle":"cn.piflow.bundle.common.Fork",
|
||||
"properties":{
|
||||
"outports":"out1,out2,out3,out4,out5,out6,out7,out8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"uuid":"3333",
|
||||
"name":"XmlParser1",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"phdthesis",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"4444",
|
||||
"name":"ConvertSchema1",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"5555",
|
||||
"name":"PutHiveStreaming1",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_phdthesis_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"6666",
|
||||
"name":"XmlParser2",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"mastersthesis",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"7777",
|
||||
"name":"ConvertSchema2",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"8888",
|
||||
"name":"PutHiveStreaming2",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_mastersthesis_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"9999",
|
||||
"name":"XmlParser3",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"www",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"10101010",
|
||||
"name":"ConvertSchema3",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"11111111",
|
||||
"name":"PutHiveStreaming3",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_www_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"12121212",
|
||||
"name":"XmlParser4",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"incollection",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"13131313",
|
||||
"name":"ConvertSchema4",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"14141414",
|
||||
"name":"PutHiveStreaming4",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_incollection_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"15151515",
|
||||
"name":"XmlParser5",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"book",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"16161616",
|
||||
"name":"ConvertSchema5",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"17171717",
|
||||
"name":"PutHiveStreaming5",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_book_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"18181818",
|
||||
"name":"XmlParser6",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"proceedings",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"19191919",
|
||||
"name":"ConvertSchema6",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"20202020",
|
||||
"name":"PutHiveStreaming6",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_proceedings_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"21212121",
|
||||
"name":"XmlParser7",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"inproceedings",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"22222222",
|
||||
"name":"ConvertSchema7",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"23232323",
|
||||
"name":"PutHiveStreaming7",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_inproceedings_all_1"
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"24242424",
|
||||
"name":"XmlParser8",
|
||||
"bundle":"cn.piflow.bundle.xml.XmlParser",
|
||||
"properties":{
|
||||
"xmlpath":"hdfs://10.0.86.89:9000/yqd/dblp_1/dblp.xml",
|
||||
"rowTag":"article",
|
||||
"schema":""
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"uuid":"25252525",
|
||||
"name":"ConvertSchema8",
|
||||
"bundle":"cn.piflow.bundle.common.ConvertSchema",
|
||||
"properties":{
|
||||
"schema":"_corrupt_record->corrupt_record,_key->key,_mdate->mdate,_publtype->publtype"
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"uuid":"26262626",
|
||||
"name":"PutHiveStreaming8",
|
||||
"bundle":"cn.piflow.bundle.hive.PutHiveStreaming",
|
||||
"properties":{
|
||||
"database":"sparktest",
|
||||
"table":"yqd_dblp_article_all_1"
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
],
|
||||
"paths":[
|
||||
{
|
||||
"from":"UnzipFilesOnHDFS",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"Fork"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out1",
|
||||
"inport":"",
|
||||
"to":"XmlParser1"
|
||||
},{
|
||||
"from":"XmlParser1",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema1"
|
||||
},{
|
||||
"from":"ConvertSchema1",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming1"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out2",
|
||||
"inport":"",
|
||||
"to":"XmlParser2"
|
||||
},{
|
||||
"from":"XmlParser2",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema2"
|
||||
},{
|
||||
"from":"ConvertSchema2",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming2"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out3",
|
||||
"inport":"",
|
||||
"to":"XmlParser3"
|
||||
},{
|
||||
"from":"XmlParser3",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema3"
|
||||
},{
|
||||
"from":"ConvertSchema3",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming3"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out4",
|
||||
"inport":"",
|
||||
"to":"XmlParser4"
|
||||
},{
|
||||
"from":"XmlParser4",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema4"
|
||||
},{
|
||||
"from":"ConvertSchema4",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming4"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out5",
|
||||
"inport":"",
|
||||
"to":"XmlParser5"
|
||||
},{
|
||||
"from":"XmlParser5",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema5"
|
||||
},{
|
||||
"from":"ConvertSchema5",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming5"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out6",
|
||||
"inport":"",
|
||||
"to":"XmlParser6"
|
||||
},{
|
||||
"from":"XmlParser6",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema6"
|
||||
},{
|
||||
"from":"ConvertSchema6",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming6"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out7",
|
||||
"inport":"",
|
||||
"to":"XmlParser7"
|
||||
},{
|
||||
"from":"XmlParser7",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema7"
|
||||
},{
|
||||
"from":"ConvertSchema7",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming7"
|
||||
},{
|
||||
"from":"Fork",
|
||||
"outport":"out8",
|
||||
"inport":"",
|
||||
"to":"XmlParser8"
|
||||
},{
|
||||
"from":"XmlParser8",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"ConvertSchema8"
|
||||
},{
|
||||
"from":"ConvertSchema8",
|
||||
"outport":"",
|
||||
"inport":"",
|
||||
"to":"PutHiveStreaming8"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -16,14 +16,14 @@ class ConvertSchema extends ConfigurableStop {
|
|||
var schema:String = _
|
||||
|
||||
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
|
||||
val df = in.read()
|
||||
var df = in.read()
|
||||
|
||||
//oldField1->newField1, oldField2->newField2
|
||||
val field = schema.split(",")
|
||||
|
||||
field.foreach(f => {
|
||||
val old_new: Array[String] = f.split("->")
|
||||
df.withColumnRenamed(old_new(0),old_new(1))
|
||||
df = df.withColumnRenamed(old_new(0),old_new(1))
|
||||
})
|
||||
|
||||
println("###########################")
|
||||
|
|
Loading…
Reference in New Issue