Merge remote-tracking branch 'origin/master'

2019-03-29 11:31:25 +08:00 · 2019-03-29 11:31:25 +08:00 · ce3ab90c21
parent 91ce8361b0 58c97dfe46
commit ce3ab90c21
60 changed files with 716 additions and 958 deletions
--- a/piflow-bundle/src/main/resources/microorganism/PDB.json
+++ b/piflow-bundle/src/main/resources/microorganism/PDB.json
@ -6,28 +6,41 @@
     {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/PDB/",
+          "HDFSPath":"/yqd/ftp/pdb/07/",
          "selectionConditions":".*.ent.gz"
        }
      },{
        "uuid":"2222",
        "name":"UnzipFilesOnHDFS",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "hdfsUrl":"hdfs://10.0.88.70:9000",
-          "filePath":"/yqd/weishengwu/PDB/",
-          "savePath":""
+          "filePath":"",
+          "savePath":"/yqd/test/pdb/pdb1/"
        }
      },{
        "uuid":"3333",
        "name":"PDBParser",
-        "bundle":"cn.piflow.bundle.microorganism.PDBParser",
+        "bundle":"cn.piflow.bundle.microorganism.PDBData",
        "properties":{
+          "cachePath":"/yqd/test/pdb/"
        }
+      },
+      {
+        "uuid":"4444",
+        "name":"putEs",
+        "bundle":"cn.piflow.bundle.es.PutEs",
+        "properties":{
+          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
+          "es_port": "9200",
+          "es_index": "test_17",
+          "es_type": "test_17"
+        }
+
      }
    ],
    "paths":[
@ -41,6 +54,11 @@
        "outport":"",
        "inport":"",
        "to":"PDBParser"
+      },{
+        "from":"PDBParser",
+        "outport":"",
+        "inport":"",
+        "to":"putEs"
      }
    ]
  }
--- a/piflow-bundle/src/main/resources/microorganism/bioSample.json
+++ b/piflow-bundle/src/main/resources/microorganism/bioSample.json
@ -7,30 +7,30 @@
      {
        "uuid":"0000",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://master2.packone:8020",
-          "HDFSPath":"/microo/",
+          "HDFSPath":"/yg/microo/",
          "selectionConditions":".*ample_set.xml.gz"
        }
      },{
        "uuid":"2222",
        "name":"UnzipFilesOnHDFS",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "filePath":"",
          "hdfsUrl":"hdfs://master2.packone:8020",
-          "savePath":"/microo/biosample/biosample/"
+          "savePath":"/yg/microo/biosample/"

        }
      },
      {
        "uuid":"2222",
        "name":"BioSampleParse",
-        "bundle":"cn.piflow.bundle.microorganism.BioSampleParse",
+        "bundle":"cn.piflow.bundle.microorganism.BioSample",
        "properties":{
-
+          "cachePath": "/yg/microoCache/"
        }
      },
      {
@ -38,10 +38,10 @@
        "name":"putEs",
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
-          "es_nodes":"10.0.88.70,10.0.88.71,10.0.88.72",
-          "port":"9200",
-          "es_index":"sample0122",
-          "es_type":"sample0122"
+          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
+          "es_port": "9200",
+          "es_index": "test_11",
+          "es_type": "test_11"
        }

      }
--- a/piflow-bundle/src/main/resources/microorganism/bioProject.json
+++ b/piflow-bundle/src/main/resources/microorganism/bioProject.json
@ -6,22 +6,19 @@
      {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://master2.packone:8020",
-          "HDFSPath":"/microo/biproject/",
+          "HDFSPath":"/yg/microo/biproject/",
          "selectionConditions":"bioproject.xml"
        }
      },
      {
        "uuid":"2222",
        "name":"BioProjetDataParse",
-        "bundle":"cn.piflow.bundle.microorganism.BioProjetDataParse",
+        "bundle":"cn.piflow.bundle.microorganism.BioProjetData",
        "properties":{
-          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "bioproject",
-          "es_type": "bioprojecttest002"
+          "cachePath": "/yg/microoCache/"
        }
      },
      {
@ -30,9 +27,9 @@
        "bundle": "cn.piflow.bundle.es.PutEs",
        "properties": {
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "bioproject10",
-          "es_type": "bioproject10"
+          "es_port": "9200",
+          "es_index": "test_10",
+          "es_type": "test_10"
        }
      }
    ],
--- a/piflow-bundle/src/main/resources/microorganism/down.json
+++ b/piflow-bundle/src/main/resources/microorganism/down.json
@ -1,31 +0,0 @@
-{
-  "flow":{
-    "name":"test",
-    "uuid":"1234",
-    "stops":[
-      {
-        "uuid":"1111",
-        "name":"LoadFromFtpToHDFS",
-        "bundle":"cn.piflow.bundle.ftp.LoadFromFtpToHDFS",
-        "properties":{
-          "url_str":"ftp.ebi.ac.uk",
-          "port":"",
-          "username":"",
-          "password":"",
-          "ftpFile":"/pub/databases/ena/sequence/release/con/rel_con_env_07_r138.dat.gz",
-          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/embl/",
-          "isFile":"true"
-        }
-      }
-    ],
-    "paths":[
-      {
-        "from":"",
-        "outport":"",
-        "inport":"",
-        "to":""
-      }
-    ]
-  }
-}
--- a/piflow-bundle/src/main/resources/microorganism/embl_parser.json
+++ b/piflow-bundle/src/main/resources/microorganism/embl_parser.json
@ -7,29 +7,30 @@
     {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/embl",
-          "selectionConditions":".*con_pro_02_r138.dat.gz,.*con_vrl_01_r138.dat.gz,.*pat_phg_01_r138.dat.gz"
+          "HDFSPath":"/yqd/ftp/embl/",
+          "selectionConditions":"rel_exp_con_pro_26_r138.dat.gz"
        }
      },{
        "uuid":"2222",
-        "name":"UnzipFilesOnHDFS_1",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
+        "name":"UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "filePath":"",
-          "fileType":"gz",
-          "unzipPath":""
+          "hdfsUrl":"hdfs://master2.packone:8020",
+          "savePath":"/yqd/test/embl/"

        }
      },
      {
        "uuid":"3333",
        "name":"EmblParser",
-        "bundle":"cn.piflow.bundle.microorganism.EmblParser",
+        "bundle":"cn.piflow.bundle.microorganism.EmblData",
        "properties":{
+          "cachePath": "/yqd/test/embl/"
        }
      },{
        "uuid":"4444",
@ -37,9 +38,9 @@
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "embl",
-          "es_type": "embl"
+          "es_port": "9200",
+          "es_index": "test_22",
+          "es_type": "test_22"
        }
      }
    ],
@ -48,10 +49,10 @@
        "from":"SelectFilesByName",
        "outport":"",
        "inport":"",
-        "to":"UnzipFilesOnHDFS_1"
+        "to":"UnzipFilesOnHDFS"
      },
      {
-        "from":"UnzipFilesOnHDFS_1",
+        "from":"UnzipFilesOnHDFS",
        "outport":"",
        "inport":"",
        "to":"EmblParser"
--- a/piflow-bundle/src/main/resources/microorganism/ensembl.json
+++ b/piflow-bundle/src/main/resources/microorganism/ensembl.json
@ -9,15 +9,26 @@
        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/ftp/ensembl_gff3/",
-          "selectionConditions":".*.gff3"
+          "HDFSPath":"/yqd/ftp/ensembl_gff3/aspergillus_niger/",
+          "selectionConditions":".*.gff3.gz"
        }
      },{
+        "uuid":"2222",
+        "name":"UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
+        "properties":{
+          "isCustomize":"false",
+          "filePath":"",
+          "hdfsUrl":"hdfs://10.0.88.70:9000",
+          "savePath":"/yqd/test/ensembl/"
+        }
+      },
+      {
        "uuid":"3333",
        "name":"Ensembl_gff3Parser",
-        "bundle":"cn.piflow.bundle.microorganism.Ensembl_gff3Parser",
+        "bundle":"cn.piflow.bundle.microorganism.Ensembl",
        "properties":{
-
+          "cachePath":"/yqd/test/ensembl/"
        }
      },{
        "uuid":"4444",
@ -25,9 +36,9 @@
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "yqd_ensembl_index",
-          "es_type": "yqd_ensembl_type"
+          "es_port": "9200",
+          "es_index": "test_21",
+          "es_type": "test_21"
        }
      }
    ],
@ -36,6 +47,11 @@
        "from":"SelectFilesByName",
        "outport":"",
        "inport":"",
+        "to":"UnzipFilesOnHDFS"
+      },{
+        "from":"UnzipFilesOnHDFS",
+        "outport":"",
+        "inport":"",
        "to":"Ensembl_gff3Parser"
      },{
        "from":"Ensembl_gff3Parser",
--- a/piflow-bundle/src/main/resources/microorganism/genbank.json
+++ b/piflow-bundle/src/main/resources/microorganism/genbank.json
@ -6,33 +6,30 @@
      {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://master2.packone:8020",
-          "HDFSPath":"/microo/genbank/",
-          "selectionConditions":".*.seq.gz"
+          "HDFSPath":"/yg/microo/genbank2/",
+          "selectionConditions":"gbbct1.seq.gz"
        }
      },{
        "uuid":"2222",
        "name":"UnzipFilesOnHDFS",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "filePath":"",
          "hdfsUrl":"hdfs://master2.packone:8020",
-          "savePath":"/microo/genbank/"
+          "savePath":"/yg/microo/genbank/"

        }
      },
      {
        "uuid":"3333",
        "name":"GenBankParse",
-        "bundle":"cn.piflow.bundle.microorganism.GenBankParse",
+        "bundle":"cn.piflow.bundle.microorganism.GenBankData",
        "properties":{
-          "es_nodes":"10.0.86.239",
-          "port":"9200",
-          "es_index":"genbank",
-          "es_type":"data6"
+          "cachePath": "/yg/microoCache/"
        }
      },
      {
@ -41,9 +38,9 @@
        "bundle": "cn.piflow.bundle.es.PutEs",
        "properties": {
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "genbank",
-          "es_type": "genbank1"
+          "es_port": "9200",
+          "es_index": "test_12",
+          "es_type": "test_12"
        }
      }
    ],
--- a/piflow-bundle/src/main/resources/microorganism/gene.json
+++ b/piflow-bundle/src/main/resources/microorganism/gene.json
@ -6,29 +6,43 @@
     {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/gene_w/",
-          "selectionConditions":".*ene_info"
+          "HDFSPath":"/yqd/ftp/gene/",
+          "selectionConditions":"gene_info.gz"
        }
      },{
-        "uuid":"3333",
-        "name":"GeneParser",
-        "bundle":"cn.piflow.bundle.microorganism.GeneParser",
+        "uuid":"2222",
+        "name":"UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
+          "isCustomize":"false",
+          "filePath":"",
+          "hdfsUrl":"hdfs://10.0.88.70:9000",
+          "savePath":"/yqd/test/gene/"
+        }
+      },
+      {
+        "uuid":"3333",
+        "name":"Gene",
+        "bundle":"cn.piflow.bundle.microorganism.Gene",
+        "properties":{
+          "cachePath":"/yqd/test/gene/"

        }
-      },{
+      },
+      {
        "uuid":"4444",
-        "name":"PutEs",
+        "name":"putEs",
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "gene_index",
-          "es_type": "gene_type"
+          "es_port": "9200",
+          "es_index": "test_16",
+          "es_type": "test_16"
        }
+
      }
    ],
    "paths":[
@ -36,12 +50,17 @@
        "from":"SelectFilesByName",
        "outport":"",
        "inport":"",
-        "to":"GeneParser"
+        "to":"UnzipFilesOnHDFS"
      },{
-        "from":"GeneParser",
+        "from":"UnzipFilesOnHDFS",
        "outport":"",
        "inport":"",
-        "to":"PutEs"
+        "to":"Gene"
+      },{
+        "from":"Gene",
+        "outport":"",
+        "inport":"",
+        "to":"putEs"
      }
    ]
  }
--- a/piflow-bundle/src/main/resources/microorganism/godata.json
+++ b/piflow-bundle/src/main/resources/microorganism/godata.json
@ -6,18 +6,19 @@
      {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://master2.packone:8020",
-          "HDFSPath":"/microoAll/",
+          "HDFSPath":"/yg/microo/go/",
          "selectionConditions":"go.obo"
        }
      },
      {
        "uuid": "3333",
        "name": "GoDataParse",
-        "bundle": "cn.piflow.bundle.microorganism.GoDataParse",
+        "bundle": "cn.piflow.bundle.microorganism.GoData",
        "properties": {
+          "cachePath": "/yg/microoCache/"
        }
      },
      {
@ -26,9 +27,9 @@
        "bundle": "cn.piflow.bundle.es.PutEs",
        "properties": {
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "go",
-          "es_type": "go"
+          "es_port": "9200",
+          "es_index": "test_13",
+          "es_type": "test_13"
        }
      }
    ],
--- a/piflow-bundle/src/main/resources/microorganism/goldData.json
+++ b/piflow-bundle/src/main/resources/microorganism/goldData.json
@ -9,7 +9,7 @@
        "bundle":"cn.piflow.bundle.http.FileDownHDFS",
        "properties":{
          "url_str":"https://gold.jgi.doe.gov/download?mode=site_excel",
-          "savePath":"hdfs://master2.packone:8020/microo/golddata/gold.xlsx"
+          "savePath":"hdfs://master2.packone:8020/yg/microo/golddata/gold.xlsx"
        }
      },
      {
@ -17,26 +17,7 @@
        "name": "ExcelParser",
        "bundle": "cn.piflow.bundle.excel.ExcelParser",
        "properties": {
-          "jaonSavePath":"hdfs://master2.packone:8020/microo/golddata/golddata.json"
-        }
-      },
-      {
-        "uuid": "2222",
-        "name": "GoldDataParse",
-        "bundle": "cn.piflow.bundle.microorganism.GoldDataParse",
-        "properties": {
-
-        }
-      },
-      {
-        "uuid": "3333",
-        "name": "putEs",
-        "bundle": "cn.piflow.bundle.es.PutEs",
-        "properties": {
-          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "golddata1",
-          "es_type": "golddatadaa"
+          "cachePath":"hdfs://master2.packone:8020/yg/microo/golddata/golddata.json"
        }
      }
    ],
@ -46,18 +27,6 @@
        "outport":"",
        "inport":"",
        "to":"ExcelParser"
-      },
-      {
-        "from":"ExcelParser",
-        "outport":"",
-        "inport":"",
-        "to":"GoldDataParse"
-      },
-      {
-        "from":"GoldDataParse",
-        "outport":"",
-        "inport":"",
-        "to":"putEs"
      }
    ]
  }
--- a/piflow-bundle/src/main/resources/microorganism/interpro.json
+++ b/piflow-bundle/src/main/resources/microorganism/interpro.json
@ -6,29 +6,30 @@
      {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://master2.packone:8020",
-          "HDFSPath":"/microoAll/",
+          "HDFSPath":"/yg/microo/interpro/",
          "selectionConditions":"interpro.xml.gz"
        }
      },{
        "uuid":"2222",
        "name":"UnzipFilesOnHDFS",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "filePath":"",
          "hdfsUrl":"hdfs://master2.packone:8020",
-          "savePath":"/microoAll/inter/"
+          "savePath":"/yg/microo/interpro/"

        }
      },
      {
        "uuid": "3333",
        "name": "InterprodataParse",
-        "bundle": "cn.piflow.bundle.microorganism.InterprodataParse",
+        "bundle": "cn.piflow.bundle.microorganism.InterproData",
        "properties": {
+          "cachePath": "/yg/microoCache/"
        }
      },
      {
@ -37,9 +38,9 @@
        "bundle": "cn.piflow.bundle.es.PutEs",
        "properties": {
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "interpro",
-          "es_type": "interpro"
+          "es_port": "9200",
+          "es_index": "test_14",
+          "es_type": "test_14"
        }
      }
    ],
--- a/piflow-bundle/src/main/resources/microorganism/NCBI_Microbe_genome.json
+++ b/piflow-bundle/src/main/resources/microorganism/NCBI_Microbe_genome.json
@ -15,9 +15,9 @@
      },{
        "uuid":"3333",
        "name":"MicrobeGenomeDataParser",
-        "bundle":"cn.piflow.bundle.microorganism.MicrobeGenomeDataParser",
+        "bundle":"cn.piflow.bundle.microorganism.MicrobeGenomeData",
        "properties":{
-
+          "cachePath":"/yqd/test/microbe/"
        }
      },{
        "uuid":"4444",
@ -25,9 +25,9 @@
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "yqd_ncbi_microbe_genome_index_delet",
-          "es_type": "yqd_ncbi_microbe_genome_type_delet"
+          "es_port": "9200",
+          "es_index": "test_19",
+          "es_type": "test_19"
        }
      }
    ],
--- a/piflow-bundle/src/main/resources/microorganism/pfam.json
+++ b/piflow-bundle/src/main/resources/microorganism/pfam.json
@ -0,0 +1,50 @@
+{
+  "flow":{
+    "name":"test",
+    "uuid":"1234",
+    "stops":[
+     {
+        "uuid":"1111",
+        "name":"SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
+        "properties":{
+          "HDFSUrl":"hdfs://10.0.88.70:9000",
+          "HDFSPath":"/yqd/test/pfam/",
+          "selectionConditions":".*full"
+        }
+      },
+      {
+        "uuid":"3333",
+        "name":"PfamData",
+        "bundle":"cn.piflow.bundle.microorganism.PfamData",
+        "properties":{
+          "cachePath":"/yqd/test/pfam/"
+        }
+      },
+      {
+        "uuid":"4444",
+        "name":"putEs",
+        "bundle":"cn.piflow.bundle.es.PutEs",
+        "properties":{
+          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
+          "es_port": "9200",
+          "es_index": "test_15",
+          "es_type": "test_15"
+        }
+      }
+    ],
+    "paths":[
+      {
+        "from":"SelectFilesByName",
+        "outport":"",
+        "inport":"",
+        "to":"PfamData"
+      },{
+        "from":"PfamData",
+        "outport":"",
+        "inport":"",
+        "to":"putEs"
+      }
+    ]
+  }
+}
--- a/piflow-bundle/src/main/resources/microorganism/png/BioProject.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/BioProject.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Biosample.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Biosample.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Ensembl.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Ensembl.png
--- a/piflow-bundle/src/main/resources/microorganism/png/GOLD.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/GOLD.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Genbank.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Genbank.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Gene.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Gene.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Gene_Ontology.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Gene_Ontology.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Interpro.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Interpro.png
--- a/piflow-bundle/src/main/resources/microorganism/png/NCBI_Microbe_genome.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/NCBI_Microbe_genome.png
--- a/piflow-bundle/src/main/resources/microorganism/png/PDB.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/PDB.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Pfam.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Pfam.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Refseq.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Refseq.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Swissprot+TrEMBL.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Swissprot+TrEMBL.png
--- a/piflow-bundle/src/main/resources/microorganism/png/Taxonomy.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/Taxonomy.png
--- a/piflow-bundle/src/main/resources/microorganism/png/embl.png
+++ b/piflow-bundle/src/main/resources/microorganism/png/embl.png
--- a/piflow-bundle/src/main/resources/microorganism/refseq_genome.json
+++ b/piflow-bundle/src/main/resources/microorganism/refseq_genome.json
@ -7,29 +7,29 @@
     {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/refseq/",
-          "selectionConditions":".*genomic.gbff.gz"
+          "HDFSPath":"/yqd/ftp/refseq/",
+          "selectionConditions":"archaea.4.genomic.gbff.gz"
        }
      },{
        "uuid":"2222",
-        "name":"UnzipFilesOnHDFS_1",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
+        "name":"UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
          "isCustomize":"false",
          "filePath":"",
-          "fileType":"gz",
-          "unzipPath":""
-
+          "hdfsUrl":"hdfs://10.0.88.70:9000",
+          "savePath":"/yqd/test/refseq/"
        }
      },
      {
        "uuid":"3333",
        "name":"Refseq_genomeParser",
-        "bundle":"cn.piflow.bundle.microorganism.Refseq_genomeParser",
+        "bundle":"cn.piflow.bundle.microorganism.RefseqData",
        "properties":{
+          "cachePath":"/yqd/test/refseq/"
        }
      },{
        "uuid":"4444",
@ -37,9 +37,9 @@
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "genome",
-          "es_type": "archaea"
+          "es_port": "9200",
+          "es_index": "test_20",
+          "es_type": "test_20"
        }
      }
    ],
@ -48,10 +48,10 @@
        "from":"SelectFilesByName",
        "outport":"",
        "inport":"",
-        "to":"UnzipFilesOnHDFS_1"
+        "to":"UnzipFilesOnHDFS"
      },
      {
-        "from":"UnzipFilesOnHDFS_1",
+        "from":"UnzipFilesOnHDFS",
        "outport":"",
        "inport":"",
        "to":"Refseq_genomeParser"
--- a/piflow-bundle/src/main/resources/microorganism/select_unzip.json
+++ b/piflow-bundle/src/main/resources/microorganism/select_unzip.json
@ -1,37 +0,0 @@
-{
-  "flow":{
-    "name":"test",
-    "uuid":"1234",
-    "stops":[
-      {
-        "uuid":"0000",
-        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
-        "properties":{
-          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/",
-          "selectionConditions":".*genomic.gbff.gz"
-        }
-      },{
-        "uuid":"1111",
-        "name":"UnzipFilesOnHDFS_1",
-        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
-        "properties":{
-          "isCustomize":"true",
-          "filePath":"hdfs://10.0.88.70:9000/yqd/archaea.1.genomic.gbff.gz",
-          "fileType":"gz",
-          "unzipPath":"hdfs://10.0.88.70:9000/yqd/weishengwu/"
-
-        }
-      }
-    ],
-    "paths":[
-      {
-        "from":"SelectFilesByName",
-        "outport":"",
-        "inport":"",
-        "to":"UnzipFilesOnHDFS_1"
-      }
-    ]
-  }
-}
--- a/piflow-bundle/src/main/resources/microorganism/swissprot.json
+++ b/piflow-bundle/src/main/resources/microorganism/swissprot.json
@ -6,18 +6,29 @@
     {
        "uuid":"1111",
        "name":"SelectFilesByName",
-        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "bundle":"cn.piflow.bundle.hdfs.SelectFilesByName",
        "properties":{
          "HDFSUrl":"hdfs://10.0.88.70:9000",
-          "HDFSPath":"/yqd/weishengwu/Swissprot_TrEMBL/",
-          "selectionConditions":"uniprot_sprot.dat"
+          "HDFSPath":"/yqd/ftp/swiss/",
+          "selectionConditions":".*trembl.dat.gz"
        }
      },{
-        "uuid":"3333",
-        "name":"Swissprot_TrEMBLDataParser",
-        "bundle":"cn.piflow.bundle.microorganism.Swissprot_TrEMBLDataParser",
+        "uuid":"2222",
+        "name":"UnzipFilesOnHDFS",
+        "bundle":"cn.piflow.bundle.hdfs.UnzipFilesOnHDFS",
        "properties":{
-
+          "isCustomize":"false",
+          "filePath":"",
+          "hdfsUrl":"hdfs://10.0.88.70:9000",
+          "savePath":"/yqd/test/swiss/"
+        }
+      },
+      {
+        "uuid":"3333",
+        "name":"SwissprotData",
+        "bundle":"cn.piflow.bundle.microorganism.SwissprotData",
+        "properties":{
+          "cachePath":"/yqd/test/swiss/"
        }
      },{
        "uuid":"4444",
@ -25,9 +36,9 @@
        "bundle":"cn.piflow.bundle.es.PutEs",
        "properties":{
          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
-          "port": "9200",
-          "es_index": "Swissprot_TrEMBL_index",
-          "es_type": "Swissprot_TrEMBL_type"
+          "es_port": "9200",
+          "es_index": "test_18",
+          "es_type": "test_18"
        }
      }
    ],
@ -36,9 +47,15 @@
        "from":"SelectFilesByName",
        "outport":"",
        "inport":"",
-        "to":"Swissprot_TrEMBLDataParser"
+        "to":"UnzipFilesOnHDFS"
+      },
+      {
+        "from":"UnzipFilesOnHDFS",
+        "outport":"",
+        "inport":"",
+        "to":"SwissprotData"
      },{
-        "from":"Swissprot_TrEMBLDataParser",
+        "from":"SwissprotData",
        "outport":"",
        "inport":"",
        "to":"PutEs"
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/es/FetchEs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/es/FetchEs.scala
@ -48,10 +48,14 @@ class FetchEs extends ConfigurableStop {

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes").defaultValue("Node of Elasticsearch").required(true)
-    val es_port = new PropertyDescriptor().name("es_port").displayName("es_port").defaultValue("Port of Elasticsearch").required(true)
-    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index").defaultValue("Index of Elasticsearch").required(true)
-    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type").defaultValue("Type of Elasticsearch").required(true)
+    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes")
+      .description("Node of Elasticsearch").defaultValue("").required(true)
+    val es_port = new PropertyDescriptor().defaultValue("9200").name("es_port").displayName("es_port")
+      .description("Port of Elasticsearch").required(true)
+    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index")
+      .description("Index of Elasticsearch").defaultValue("").required(true)
+    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type")
+      .description("Type of Elasticsearch").defaultValue("").required(true)


    descriptor = es_nodes :: descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/es/PutEs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/es/PutEs.scala
@ -47,11 +47,14 @@ class PutEs extends ConfigurableStop {

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes").defaultValue("Node of Elasticsearch").required(true)
-    val es_port = new PropertyDescriptor().name("es_port").displayName("es_port").defaultValue("Port of Elasticsearch").required(true)
-    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index").defaultValue("Index of Elasticsearch").required(true)
-    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type").defaultValue("Type of Elasticsearch").required(true)
-
+    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes")
+      .description("Node of Elasticsearch").defaultValue("").required(true)
+    val es_port = new PropertyDescriptor().defaultValue("9200").name("es_port").displayName("es_port")
+      .description("Port of Elasticsearch").required(true)
+    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index")
+      .description("Index of Elasticsearch").defaultValue("").required(true)
+    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type")
+      .description("Type of Elasticsearch").defaultValue("").required(true)

    descriptor = es_nodes :: descriptor
    descriptor = es_port :: descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/es/QueryEs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/es/QueryEs.scala
@ -50,16 +50,22 @@ class QueryEs extends ConfigurableStop {

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes").defaultValue("Node of Elasticsearch").required(true)
-    val es_port = new PropertyDescriptor().name("es_port").displayName("es_port").defaultValue("Port of Elasticsearch").required(true)
-    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index").defaultValue("Index of Elasticsearch").required(true)
-    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type").defaultValue("Type of Elasticsearch").required(true)
-
+    val es_nodes = new PropertyDescriptor().name("es_nodes").displayName("es_nodes")
+      .description("Node of Elasticsearch").defaultValue("").required(true)
+    val es_port = new PropertyDescriptor().defaultValue("9200").name("es_port").displayName("es_port")
+      .description("Port of Elasticsearch").required(true)
+    val es_index = new PropertyDescriptor().name("es_index").displayName("es_index")
+      .description("Index of Elasticsearch").defaultValue("").required(true)
+    val es_type = new PropertyDescriptor().name("es_type").displayName("es_type")
+      .description("Type of Elasticsearch").defaultValue("").required(true)
+    val jsonDSL = new PropertyDescriptor().name("jsonDSL").displayName("jsonDSL")
+      .description("DSL of Elasticsearch").defaultValue("").required(true)

    descriptor = es_nodes :: descriptor
    descriptor = es_port :: descriptor
    descriptor = es_index :: descriptor
    descriptor = es_type :: descriptor
+    descriptor = jsonDSL :: descriptor

    descriptor
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/DeleteHdfs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/DeleteHdfs.scala
@ -14,39 +14,67 @@ import org.apache.hadoop.fs.Path
 class DeleteHdfs extends ConfigurableStop{
  override val authorEmail: String = "ygang@cmic.com"

-  override val inportList: List[String] = List(PortEnum.NonePort.toString)
-  override val outportList: List[String] = List(PortEnum.NonePort.toString)
+  override val inportList: List[String] = List(PortEnum.DefaultPort.toString)
+  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val description: String = "Delete file or directory on hdfs"

  var hdfsUrl :String= _
  var deletePath :String = _
+  var isCustomize:String=_
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    val spark = pec.get[SparkSession]()

-    val array = deletePath.split(",")
+    if (isCustomize.equals("false")){
+      val inDf = in.read()

-    for (i<- 0 until array.length){
-      val hdfsPath = hdfsUrl+array(i)
-      val path = new Path(array(i))
+      val configuration: Configuration = new Configuration()
+      var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+      val pathARR: Array[String] = pathStr.split("\\/")
+      var hdfsUrl:String=""
+      for (x <- (0 until 3)){
+        hdfsUrl+=(pathARR(x) +"/")
+      }
+      configuration.set("fs.defaultFS",hdfsUrl)

-      val config = new Configuration()
-      config.set("fs.defaultFS",hdfsUrl)
-      val fs = FileSystem.get(config)
+      var fs: FileSystem = FileSystem.get(configuration)

-      fs.delete(path,true)
+      inDf.collect.foreach(row=>{
+        pathStr = row.get(0).asInstanceOf[String]
+        val path = new Path(pathStr)
+        fs.delete(path,true)
+      })

+    } else {
+      val array = deletePath.split(",")
+
+      for (i<- 0 until array.length){
+        val hdfsPath = hdfsUrl+"/"+array(i)
+        val path = new Path(hdfsPath)
+
+        val config = new Configuration()
+        config.set("fs.defaultFS",hdfsUrl)
+        val fs = FileSystem.get(config)
+
+        fs.delete(path,true)
+      }
    }
  }
  override def setProperties(map: Map[String, Any]): Unit = {
    hdfsUrl = MapUtil.get(map,key="hdfsUrl").asInstanceOf[String]
    deletePath = MapUtil.get(map,key="deletePath").asInstanceOf[String]
+    isCustomize=MapUtil.get(map,key="isCustomize").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val hdfsUrl = new PropertyDescriptor().name("hdfsUrl").displayName("hdfsUrl").defaultValue("").required(true)
    val deletePath = new PropertyDescriptor().name("deletePath").displayName("deletePath").defaultValue("").required(true)
+    val isCustomize = new PropertyDescriptor().name("isCustomize").displayName("isCustomize").description("Whether to customize the compressed file path, if true, " +
+      "you must specify the path where the compressed file is located . " +
+      "If it is false, it will automatically find the file path data from the upstream port ")
+      .defaultValue("false").allowableValues(Set("true","false")).required(true)
+    descriptor = isCustomize :: descriptor
    descriptor = hdfsUrl :: descriptor
    descriptor = deletePath :: descriptor
    descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/GetHdfs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/GetHdfs.scala
@ -10,7 +10,7 @@ import org.apache.spark.sql.SparkSession
 class GetHdfs extends ConfigurableStop{
  override val authorEmail: String = "ygang@cmic.com"
  override val description: String = "Get data from hdfs"
-  override val inportList: List[String] = List(PortEnum.NonePort.toString)
+  override val inportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

  var hdfsUrl : String=_
@ -45,7 +45,7 @@ class GetHdfs extends ConfigurableStop{
      }
      else {
        val rdd = sc.textFile(path)
-        val outDf = rdd.toDF("txt")
+        val outDf = rdd.toDF()
        outDf.schema.printTreeString()
        //outDf.show()
        out.write(outDf)
@ -60,9 +60,13 @@ class GetHdfs extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val hdfsPath = new PropertyDescriptor().name("hdfsPath").displayName("hdfsPath").defaultValue("").required(true)
-    val hdfsUrl = new PropertyDescriptor().name("hdfsUrl").displayName("hdfsUrl").defaultValue("").required(true)
-    val types = new PropertyDescriptor().name("types").displayName("types").defaultValue("txt,parquet,csv,json").required(true)
+    val hdfsPath = new PropertyDescriptor().name("hdfsPath").displayName("hdfsPath")
+      .defaultValue("").required(true)
+    val hdfsUrl = new PropertyDescriptor().name("hdfsUrl").displayName("hdfsUrl")
+      .defaultValue("").required(true)
+    val types = new PropertyDescriptor().name("types").displayName("types").description("txt,parquet,csv,json")
+      .defaultValue("txt").allowableValues(Set("txt","parquet","csv","json")).required(true)
+
    descriptor = types :: descriptor
    descriptor = hdfsPath :: descriptor
    descriptor = hdfsUrl :: descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/ListHdfs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/ListHdfs.scala
@ -6,38 +6,54 @@ import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.apache.spark.sql. SparkSession
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+
+import scala.collection.mutable.ArrayBuffer



 class ListHdfs extends ConfigurableStop{
  override val authorEmail: String = "ygang@cmic.com"

-  override val inportList: List[String] = List(PortEnum.NonePort.toString)
+  override val inportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val description: String = "Retrieve a list of files from hdfs"

-  var hdfsPath :String= _
-  var hdfsUrl :String= _
-  var list = List("")
+  var HDFSPath :String= _
+  var HDFSUrl :String= _
+  var pathARR:ArrayBuffer[String]=ArrayBuffer()
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val sc = spark.sparkContext

-    val path = new Path(hdfsPath)
-
+    val path = new Path(HDFSPath)
    iterationFile(path.toString)

    import spark.implicits._

-    val outDF = sc.parallelize(list).toDF()
+    val rows: List[Row] = pathARR.map(each => {
+      var arr:Array[String]=Array(each)
+      val row: Row = Row.fromSeq(arr)
+      row
+    }).toList
+
+    val rowRDD: RDD[Row] = sc.makeRDD(rows)
+//    val fields: Array[StructField] = "path".split("/").map(d=>StructField(d,StringType,nullable = true))
+//    val schema: StructType = StructType(fields)
+
+    val schema: StructType = StructType(Array(
+      StructField("path",StringType)
+    ))
+    val outDF: DataFrame = spark.createDataFrame(rowRDD,schema)
    out.write(outDF)
  }

  // recursively traverse the folder
  def iterationFile(path: String):Unit = {
    val config = new Configuration()
-    config.set("fs.defaultFS",hdfsUrl)
+    config.set("fs.defaultFS",HDFSUrl)
    val fs = FileSystem.get(config)
    val listf = new Path(path)

@ -45,28 +61,25 @@ class ListHdfs extends ConfigurableStop{

    for (f <- statuses) {
      val fsPath = f.getPath().toString
-      //println(fsPath)
-
      if (f.isDirectory) {
-        list = fsPath::list
+//        pathARR += fsPath
        iterationFile(fsPath)
-
      } else{
-        list = f.getPath.toString::list
+        pathARR += f.getPath.toString
      }
    }

  }

  override def setProperties(map: Map[String, Any]): Unit = {
-    hdfsUrl = MapUtil.get(map,key="hdfsUrl").asInstanceOf[String]
-    hdfsPath = MapUtil.get(map,key="hdfsPath").asInstanceOf[String]
+    HDFSUrl = MapUtil.get(map,key="HDFSUrl").asInstanceOf[String]
+    HDFSPath = MapUtil.get(map,key="HDFSPath").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val hdfsPath = new PropertyDescriptor().name("hdfsPath").displayName("hdfsPath").defaultValue("").required(true)
-    val hdfsUrl = new PropertyDescriptor().name("hdfsUrl").displayName("hdfsUrl").defaultValue("").required(true)
+    val hdfsPath = new PropertyDescriptor().name("HDFSPath").displayName("HDFSPath").defaultValue("").required(true)
+    val hdfsUrl = new PropertyDescriptor().name("HDFSUrl").displayName("HDFSUrl").defaultValue("").required(true)
    descriptor = hdfsPath :: descriptor
    descriptor = hdfsUrl :: descriptor
    descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/PutHdfs.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/PutHdfs.scala
@ -13,7 +13,7 @@ import org.apache.spark.sql.SparkSession
 class PutHdfs extends ConfigurableStop{
  override val authorEmail: String = "ygang@cmic.com"
  override val inportList: List[String] = List(PortEnum.DefaultPort.toString)
-  override val outportList: List[String] = List(PortEnum.NonePort.toString)
+  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val description: String = "Put data to hdfs"

  var hdfsPath :String= _
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/SelectFilesByName.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/SelectFilesByName.scala
@ -17,7 +17,7 @@ import scala.collection.mutable.ArrayBuffer
 class SelectFilesByName extends ConfigurableStop{
  override val authorEmail: String = "yangqidong@cnic.cn"
  override val description: String = "Select files by file name"
-  override val inportList: List[String] = List(PortEnum.NonePort.toString)
+  override val inportList: List[String] = List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

  var HDFSUrl:String=_
@ -68,10 +68,11 @@ class SelectFilesByName extends ConfigurableStop{
    val schema: StructType = StructType(fields)
    val df: DataFrame = session.createDataFrame(rowRDD,schema)

-
    println("#################################################")
    df.show(20)
-    println("#################################################")
+    println(df.count+"#################################################")
+
+

    out.write(df)
  }
@ -85,9 +86,12 @@ class SelectFilesByName extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val HDFSUrl = new PropertyDescriptor().name("HDFSUrl").displayName("HDFSUrl").defaultValue("The URL of the HDFS file system, such as hdfs://10.0.88.70:9000").required(true)
-    val HDFSPath = new PropertyDescriptor().name("HDFSPath").displayName("HDFSPath").defaultValue("The save path of the HDFS file system, such as /test/Ab").required(true)
-    val selectionConditions = new PropertyDescriptor().name("selectionConditions").displayName("selectionConditions").defaultValue("To select conditions, you need to fill in regular expressions in java, such as. * abc. *").required(true)
+    val HDFSUrl = new PropertyDescriptor().name("HDFSUrl").displayName("HDFSUrl").description("The URL of the HDFS file system, such as hdfs://ip:port")
+      .defaultValue("hdfs://").required(true)
+    val HDFSPath = new PropertyDescriptor().name("HDFSPath").displayName("HDFSPath").description("The save path of the HDFS file system, such as /test/Ab")
+      .defaultValue("").required(true)
+    val selectionConditions = new PropertyDescriptor().name("selectionConditions").displayName("selectionConditions").description("To select conditions, you need to fill in regular expressions in java, such as. * abc. *")
+      .defaultValue("").required(true)
    descriptor = HDFSUrl :: descriptor
    descriptor = HDFSPath :: descriptor
    descriptor = selectionConditions :: descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/UnzipFilesOnHDFS.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/hdfs/UnzipFilesOnHDFS.scala
@ -186,7 +186,7 @@ class UnzipFilesOnHDFS extends ConfigurableStop {
    val isCustomize = new PropertyDescriptor().name("isCustomize").displayName("isCustomize").description("Whether to customize the compressed file path, if true, " +
                                                                                                          "you must specify the path where the compressed file is located . " +
                                                                                                          "If it is false, it will automatically find the file path data from the upstream port ")
-                                                                                                          .defaultValue("").required(false)
+                                                                                                          .defaultValue("false").allowableValues(Set("true","false")).required(true)
    descriptor = isCustomize :: descriptor
    descriptor = filePath :: descriptor
    descriptor = hdfsUrl :: descriptor
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/BioProjetData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/BioProjetData.scala
@ -41,17 +41,15 @@ class BioProjetData extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary = hdfsUrl+cachePath+"/bioprojectCatch/bioprojectCatch.json"
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/bioprojectCache/bioprojectCache.json"

    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()
-    var fdosOut: FSDataOutputStream = fs.append(path)
-    var jsonStr: String =""
-    var bisIn: BufferedInputStream =null

+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    inDf.collect().foreach(row =>{
      val pathStr = row.get(0).asInstanceOf[String]
@ -64,23 +62,20 @@ class BioProjetData extends ConfigurableStop{
        br.readLine()
        i = i + 1
      }
-      var count = 0
+
      var xml = new StringBuffer()
      var x = 0
+      var count = 0
      while ((line = br.readLine()) != null && x <1  && line!= null ) {
+
        xml.append(line)
        if (line.equals("</PackageSet>")){
-          println("----------------------------------break")
+          println("break")
          x == 1
        } else if (line.indexOf("</" + name + ">") != -1){ //reach the end of a doc
          count = count + 1
          val doc: JSONObject = XML.toJSONObject(xml.toString()).getJSONObject(name)

-          val accession = doc.getJSONObject("Project").getJSONObject("Project")
-            .getJSONObject("ProjectID")
-            .getJSONObject("ArchiveID")
-            .getString("accession")
-
          val projectDescr = doc.getJSONObject("Project").getJSONObject("Project")
            .getJSONObject("ProjectDescr")

@ -115,7 +110,7 @@ class BioProjetData extends ConfigurableStop{
          }

          // ----------------3
-         if(doc.getJSONObject("Project").optJSONObject("Submission") != null){
+          if(doc.getJSONObject("Project").optJSONObject("Submission") != null){
            val  submission = doc.getJSONObject("Project").optJSONObject("Submission")

            if(submission.opt("submitted") != null){
@ -166,27 +161,19 @@ class BioProjetData extends ConfigurableStop{
            }
          }

-          bisIn = new BufferedInputStream(new ByteArrayInputStream((doc.toString+"\n").getBytes()))

-          val buff: Array[Byte] = new Array[Byte](1048576)
-
-          var num: Int = bisIn.read(buff)
-          while (num != -1) {
-            fdosOut.write(buff, 0, num)
-            fdosOut.flush()
-            num = bisIn.read(buff)
-          }
-          fdosOut.flush()
-          bisIn = null
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")

          xml = new StringBuffer()

        }
+
      }
+      br.close()
+      fdis.close()
    })
-
-    fdosOut.close()
-
+    hdfsWriter.close()
    println("start parser HDFSjsonFile")
    val df: DataFrame = spark.read.json(hdfsPathTemporary)
    out.write(df)
@ -199,7 +186,8 @@ class BioProjetData extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").defaultValue("").required(true)
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/bioproject").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/BioSample.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/BioSample.scala
@ -25,7 +25,6 @@ class BioSample extends ConfigurableStop{
  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val sc = spark.sparkContext
-//    val ssc = spark.sqlContext
    val inDf= in.read()

    val configuration: Configuration = new Configuration()
@ -35,27 +34,22 @@ class BioSample extends ConfigurableStop{
    for (x <- (0 until 3)){
      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    var hdfsPathJsonCache:String = ""

-    hdfsPathJsonCache = hdfsUrl+cachePath+"/biosampleCache/biosampleCache.json"
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/biosampleCache/biosampleCache.json"

-    val path: Path = new Path(hdfsPathJsonCache)
+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()

+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

-    var fdosOut: FSDataOutputStream = null
-    var bisIn: BufferedInputStream =null
-    fdosOut = fs.append(path)

    var count = 0
-    var nameNum = 0
    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]

@ -66,8 +60,7 @@ class BioSample extends ConfigurableStop{
      br.readLine()
      br.readLine()

-
-      while ((line = br.readLine()) != null && line!= null) {
+      while ((line = br.readLine()) != null && line!= null ) {
        xml = xml + line
        if (line.indexOf("</" + docName + ">") != -1) {
          count = count + 1
@ -79,7 +72,6 @@ class BioSample extends ConfigurableStop{
          if (attrs.equals("")) {
            doc.remove("Attributes")
          }
-
          // Links
          val links: String = doc.optString("Links")
          if (links != null) {
@ -123,36 +115,24 @@ class BioSample extends ConfigurableStop{
            if (jsonDoc.contains("Ids\":{\"Id\":[{")){

            } else {
-//              println(count)
-//              println(jsonDoc)
              jsonDoc = jsonDoc.replace("Ids\":{\"Id\":{","Ids\":{\"Id\":[{")
            }
          }

-          bisIn = new BufferedInputStream(new ByteArrayInputStream((jsonDoc+"\n").getBytes()))
-
-          val buff: Array[Byte] = new Array[Byte](1048576)
-
-          var num: Int = bisIn.read(buff)
-          while (num != -1) {
-            fdosOut.write(buff, 0, num)
-            fdosOut.flush()
-            num = bisIn.read(buff)
-          }
-          fdosOut.flush()
-          bisIn = null
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")

          xml = ""
        }
      }
+      br.close()
+      fdis.close()
    })
+    hdfsWriter.close()

-    //    bisIn.close()
-    fdosOut.close()
    println("start parser HDFSjsonFile")

-    val df: DataFrame = spark.read.json(hdfsPathJsonCache)
-
+    val df: DataFrame = spark.read.json(hdfsPathTemporary)
    out.write(df)
  }

@ -164,7 +144,8 @@ class BioSample extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").defaultValue("").required(true)
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/biosample").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblData.scala
@ -4,7 +4,7 @@ import java.io._

 import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -20,13 +20,12 @@ class EmblData extends ConfigurableStop{
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)


-
+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
-    val configuration: Configuration = new Configuration()

+    val configuration: Configuration = new Configuration()
    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
@ -36,85 +35,56 @@ class EmblData extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/emblDataCache/emblDataCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)

-    var jsonStr: String =""
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

-    var bis: BufferedInputStream =null
+    var doc: JSONObject = null
+    var seq: RichSequence = null
+    var br: BufferedReader = null
+    var sequences: RichSequenceIterator = null
+    var count : Int =0

    inDf.collect().foreach(row => {

-      var n : Int =0
-      pathStr = row.get(0).asInstanceOf[String]
+        pathStr = row.get(0).asInstanceOf[String]
+        var fdis: FSDataInputStream = fs.open(new Path(pathStr))
+        br = new BufferedReader(new InputStreamReader(fdis))

-      var fdis: FSDataInputStream = fs.open(new Path(pathStr))
+        sequences = CustomIOTools.IOTools.readEMBLDNA(br, null)
+        while (sequences.hasNext) {
+          count += 1
+          seq = sequences.nextRichSequence()
+          doc = new JSONObject
+          Process.processEMBL_EnsemblSeq(seq, doc)

-      var br: BufferedReader = new BufferedReader(new InputStreamReader(fdis))
-
-      var sequences: RichSequenceIterator = CustomIOTools.IOTools.readEMBLDNA (br, null)
-
-      while (sequences.hasNext) {
-        n += 1
-        var seq: RichSequence = sequences.nextRichSequence()
-        var doc: JSONObject = new JSONObject
-        Process.processEMBL_EnsemblSeq(seq, doc)
-        jsonStr = doc.toString
-        println("start " + n)
-
-        if (n == 1) {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-        } else {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")
        }
+        br.close()
+        fdis.close()

-        val buff: Array[Byte] = new Array[Byte](1048576)
-
-        var count: Int = bis.read(buff)
-        while (count != -1) {
-          fdos.write(buff, 0, count)
-          fdos.flush()
-          count = bis.read(buff)
-        }
-
-        fdos.flush()
-        bis = null
-        seq = null
-        doc = null
-      }
-      bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-      val buff: Array[Byte] = new Array[Byte](1048576)
-
-      var count: Int = bis.read(buff)
-      while (count != -1) {
-        fdos.write(buff, 0, count)
-        fdos.flush()
-        count = bis.read(buff)
-      }
-      fdos.flush()
    })

-    fdos.close()
-
    val df: DataFrame = session.read.json(hdfsPathTemporary)
-
    out.write(df)

  }

-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/emblData").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Ensembl.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Ensembl.scala
@ -1,10 +1,10 @@
 package cn.piflow.bundle.microorganism

-import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.io._

 import cn.piflow.bundle.microorganism.util.ParserGff3Data
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -19,12 +19,15 @@ class Ensembl extends ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  var cachePath:String = _
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/ensembl").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

@ -43,82 +46,55 @@ class Ensembl extends ConfigurableStop{
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
+
    val configuration: Configuration = new Configuration()
-    var pathStr: String = ""
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
-    try{
-      pathStr =inDf.take(1)(0).get(0).asInstanceOf[String]
-      val pathARR: Array[String] = pathStr.split("\\/")
-
-      for (x <- (0 until 3)){
-        hdfsUrl+=(pathARR(x) +"/")
-      }
-    }catch {
-      case e:Exception => throw new Exception("Path error")
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/ensembl_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/ensemblCache/ensemblCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)
+
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    val parser: ParserGff3Data = new ParserGff3Data
-
-    var bis: BufferedInputStream =null
    var fdis: FSDataInputStream =null
    var br: BufferedReader = null
-    var sequences: RichSequenceIterator = null
    var doc: JSONObject = null
-    var seq: RichSequence = null
-    var jsonStr: String = ""
+    var count:Int = 0
    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
-      println("start parser ^^^" + pathStr)
+
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      var eachStr:String=null

      while((eachStr = br.readLine()) != null && eachStr != null ){
        doc = parser.parserGff3(eachStr)
-        jsonStr = doc.toString
-        if(jsonStr.length > 2){
-          bis = new BufferedInputStream(new ByteArrayInputStream((jsonStr+"\n").getBytes()))
-          var count: Int = bis.read(buff)
-          while (count != -1) {
-            fdos.write(buff, 0, count)
-            fdos.flush()
-            count = bis.read(buff)
-          }
-          fdos.flush()

-          bis.close()
-          bis = null
-          doc = null
-          seq = null
-          jsonStr = ""
+        if(doc.toString.length > 2){
+          count += 1
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")
        }
-
      }
-      sequences = null
+
      br.close()
-      br = null
      fdis.close()
-      fdis =null
-      pathStr = null
    })

-    fdos.close()
+    hdfsWriter.close()

    out.write(session.read.json(hdfsPathTemporary))
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/GenBankData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/GenBankData.scala
@ -2,15 +2,14 @@ package cn.piflow.bundle.microorganism

 import java.io._

-
 import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
-import org.apache.spark.sql.{DataFrame,  SparkSession}
+import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.json.JSONObject


@ -21,10 +20,10 @@ class GenBankData extends ConfigurableStop{
  val outportList: List[String] = List(PortEnum.DefaultPort.toString)


+  var cachePath:String = _
  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val sc = spark.sparkContext
-
    val inDf= in.read()

    val configuration: Configuration = new Configuration()
@ -34,21 +33,18 @@ class GenBankData extends ConfigurableStop{
    for (x <- (0 until 3)){
      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/microoCache/genbank/genbankcach.json"
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/genebankCache/genebankCache.json"
+
    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()

-
-    var fdosOut: FSDataOutputStream = fs.append(path)
-    var jsonStr: String =""
-    var bisIn: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))


    inDf.collect().foreach(row=>{
@ -60,48 +56,35 @@ class GenBankData extends ConfigurableStop{

      var doc: JSONObject = null
      var count = 0
-      while (sequenceIterator.hasNext) {
-        count += 1
-        doc = new JSONObject
+        while (sequenceIterator.hasNext) {
+          count += 1
+          doc = new JSONObject

-        val seq = sequenceIterator.nextRichSequence()
+          val seq = sequenceIterator.nextRichSequence()
+          Process.processSingleSequence(seq, doc)

-        Process.processSingleSequence(seq, doc)
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")

-
-        bisIn = new BufferedInputStream(new ByteArrayInputStream((doc.toString+"\n").getBytes()))
-
-        val buff: Array[Byte] = new Array[Byte](1048576)
-        var num: Int = bisIn.read(buff)
-        while (num != -1) {
-          fdosOut.write(buff, 0, num)
-          fdosOut.flush()
-          num = bisIn.read(buff)
        }
-
-        fdosOut.flush()
-        bisIn = null
-
-      }
+        br.close()
+        fdis.close()
    })
-
-    fdosOut.close()
+    hdfsWriter.close()
    println("start parser HDFSjsonFile")
    val df: DataFrame = spark.read.json(hdfsPathTemporary)
-
-    df.schema.printTreeString()
    out.write(df)
-
-
  }


  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/genbank").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Gene.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Gene.scala
@ -1,10 +1,10 @@
 package cn.piflow.bundle.microorganism

-import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.io._
 import java.text.SimpleDateFormat

 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -18,13 +18,13 @@ class Gene extends ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-
+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
-    val configuration: Configuration = new Configuration()

+
+    val configuration: Configuration = new Configuration()
    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
@ -34,19 +34,15 @@ class Gene extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/geneCache/geneCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)

-    var jsonStr: String =""
-
-    var bis: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    var names:Array[String]=Array("tax_id", "geneID", "symbol", "locus_tag", "synonyms", "dbxrefs", "chromosome", "map_location", "description", "type_of_gene",
      "symbol_from_nomenclature_authority", "full_name_from_nomenclature_authority",
@ -54,7 +50,7 @@ class Gene extends ConfigurableStop{
    val format: java.text.DateFormat = new SimpleDateFormat("yyyyMMdd").asInstanceOf[java.text.DateFormat]
    val newFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")

-    var n:Int=0
+    var count:Int=0
    inDf.collect().foreach(row => {

      pathStr = row.get(0).asInstanceOf[String]
@ -66,9 +62,9 @@ class Gene extends ConfigurableStop{
      var line:String=""
      var doc:JSONObject=null

-      while ((line=br.readLine()) != null){
+      while ((line=br.readLine()) != null  && count < 10 ){
        if( ! line.startsWith("#")){
-          n += 1
+          count += 1
          doc=new JSONObject()
          val tokens: Array[String] = line.split("\\\t")
          for(i <- (0 until 15)){
@ -84,54 +80,28 @@ class Gene extends ConfigurableStop{
              doc.put(names(i),newFormat.format(format.parse(tokens(i))))
            }
          }
-          jsonStr = doc.toString
-          if (n == 1) {
-            bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-          } else {
-            bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
-          }
-
-          val buff: Array[Byte] = new Array[Byte](1048576)
-
-          var count: Int = bis.read(buff)
-          while (count != -1) {
-            fdos.write(buff, 0, count)
-            fdos.flush()
-            count = bis.read(buff)
-          }
-          fdos.flush()
-          bis = null
-          doc = null
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")
        }
      }
-
+      br.close()
+      fdis.close()
    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-    val buff: Array[Byte] = new Array[Byte](1048576)
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-
-    fdos.close()
-
+    hdfsWriter.close()
    val df: DataFrame = session.read.json(hdfsPathTemporary)
-
    out.write(df)


  }

-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/gene").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/GoData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/GoData.scala
@ -35,21 +35,18 @@ class GoData extends ConfigurableStop{
    for (x <- (0 until 3)){
      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathJsonCache = hdfsUrl+cachePath+"/godataCache/godataCache.json"
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/godataCache/godataCache.json"

-    val path: Path = new Path(hdfsPathJsonCache)
+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()

-    var fdosOut: FSDataOutputStream = fs.append(path)
-    var jsonStr: String =""
-    var bisIn: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
@ -65,7 +62,7 @@ class GoData extends ConfigurableStop{
      }
      var obj = new JSONObject()
      var count= 0
-      while ((line = br.readLine()) !=null && line !=null ){
+      while ((line = br.readLine()) !=null && line !=null  && count<10){
        val m: Matcher = tv.matcher(line)
        if (line.startsWith("[")){
          if (line .equals("[Term]")){
@ -89,28 +86,20 @@ class GoData extends ConfigurableStop{
          }
          count += 1

-          bisIn = new BufferedInputStream(new ByteArrayInputStream((obj.toString+"\n").getBytes()))
-
-          val buff: Array[Byte] = new Array[Byte](1048576)
-          var num: Int = bisIn.read(buff)
-          while (num != -1) {
-            fdosOut.write(buff, 0, num)
-            fdosOut.flush()
-            num = bisIn.read(buff)
-          }
-          fdosOut.flush()
-          bisIn = null
-
+          obj.write(hdfsWriter)
+          hdfsWriter.write("\n")

          obj= new JSONObject()
        }
      }
+      br.close()
+      fdis.close()
    })
+    hdfsWriter.close()

-    fdosOut.close()

    println("start parser HDFSjsonFile")
-    val df: DataFrame = spark.read.json(hdfsPathJsonCache)
+    val df: DataFrame = spark.read.json(hdfsPathTemporary)
    out.write(df)

  }
@ -120,7 +109,8 @@ class GoData extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").defaultValue("").required(true)
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/goData").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/InterproData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/InterproData.scala
@ -35,16 +35,15 @@ class InterproData extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathJsonCache = hdfsUrl+cachePath+"/interproDataCatch/interproDataCatch.json"
-    val path: Path = new Path(hdfsPathJsonCache)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/interproCache/interproCache.json"
+
+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()

-    var fdosOut: FSDataOutputStream = fs.append(path)
-    var jsonStr: String =""
-    var bisIn: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
@ -76,28 +75,20 @@ class InterproData extends ConfigurableStop{
            doc.remove("pub_list")
          }

-
-          bisIn = new BufferedInputStream(new ByteArrayInputStream((doc.toString+"\n").getBytes()))
-
-          val buff: Array[Byte] = new Array[Byte](1048576)
-          var num: Int = bisIn.read(buff)
-          while (num != -1) {
-            fdosOut.write(buff, 0, num)
-            fdosOut.flush()
-            num = bisIn.read(buff)
-          }
-          fdosOut.flush()
-          bisIn = null
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")

          xml = ""
        }
      }
+      br.close()
+      fdis.close()
    })

-    fdosOut.close()
+    hdfsWriter.close()

    println("start parser HDFSjsonFile")
-    val df: DataFrame = spark.read.json(hdfsPathJsonCache)
+    val df: DataFrame = spark.read.json(hdfsPathTemporary)

    out.write(df)

@ -109,7 +100,8 @@ class InterproData extends ConfigurableStop{

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
-    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").defaultValue("").required(true)
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/interpro").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/MedlineData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/MedlineData.scala
@ -7,7 +7,7 @@ import java.util.Locale
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
 import org.apache.spark.sql.{DataFrame, SparkSession}
@ -19,11 +19,12 @@ class MedlineData extends  ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

+  var cachePath:String = _

-   var docName = "PubmedArticle"
-   val formatter = new SimpleDateFormat("yyyy-MM-dd")
-   val format = new SimpleDateFormat("dd-MM-yyyy")
-   val format_english = new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH)
+  var docName = "PubmedArticle"
+  val formatter = new SimpleDateFormat("yyyy-MM-dd")
+  val format = new SimpleDateFormat("dd-MM-yyyy")
+  val format_english = new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH)

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val inDf: DataFrame = in.read()
@ -38,7 +39,7 @@ class MedlineData extends  ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/yqd/test/medline/medlineAll.json"
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/medlineCache/medlineCache.json"
    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
@ -56,27 +57,23 @@ class MedlineData extends  ConfigurableStop{
      fileNum += 1

      pathStr = row.get(0).asInstanceOf[String]
-      println(fileNum+"-----start parser ^^^" + pathStr)
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
+
      var i = 0
      var eachLine: String= ""
      while (i < 3) {
        eachLine = br.readLine()
        i += 1
      }
+
      var xml = ""
      while ((eachLine = br.readLine) != null   && eachLine != null ) {
-
        xml += eachLine

        if (eachLine.indexOf("</"+docName+">")!= -1){
          count += 1
-
          doc = XML.toJSONObject(xml).getJSONObject(docName).optJSONObject("MedlineCitation")
-          println(count)
-//          println(doc.toString)
-

          if (doc.optJSONObject("DateCreated") != null) {
            val dateCreated = doc.getJSONObject("DateCreated")
@ -130,9 +127,6 @@ class MedlineData extends  ConfigurableStop{
                  if (pubDate.opt("Year") != null) doc.put("PubYear", pubDate.get("Year"))

                  if (pubDate.opt("Year") != null && pubDate.opt("Month") != null && pubDate.opt("Day") != null) {
-
-
-
                    var  month = pubDate.get("Month")
                    if (month.toString.contains("01") || month.toString.contains("1")) month = "Jan"
                    if (month.toString.contains("02") || month.toString.contains("2")) month = "Feb"
@ -147,10 +141,7 @@ class MedlineData extends  ConfigurableStop{
                    if (month.toString.contains("11")) month = "Nov"
                    if (month.toString.contains("12")) month = "Dec"

-
                    val date = pubDate.get("Day") + "-" +month + "-" + pubDate.get("Year")
-
-//                    println(date+"@@@@@@@@@@@")
                    doc.put("PubDate", formatter.format(format_english.parse(date)))
              }
            }
@ -169,17 +160,13 @@ class MedlineData extends  ConfigurableStop{
          hdfsWriter.write("\n")

          xml = ""
-
        }
      }
      br.close()
      fdis.close()
    })
    hdfsWriter.close()
-
    val df: DataFrame = pec.get[SparkSession]().read.json(hdfsPathTemporary)
-    df.schema.printTreeString()
-//    println(df.count())
    out.write(df)

  }
@ -199,9 +186,7 @@ class MedlineData extends  ConfigurableStop{
              return findJSONObject(objKey, obj.getJSONArray(key).getJSONObject(i))
            }
          }
-
        }
-
      }
    }
    return null
@ -210,15 +195,17 @@ class MedlineData extends  ConfigurableStop{



-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }

-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
-    var descriptor = List()
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
+    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/medline").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }
-
  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/microorganism/MedlineData.png")
  }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/MicrobeGenomeData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/MicrobeGenomeData.scala
@ -1,10 +1,10 @@
 package cn.piflow.bundle.microorganism

-import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.io._

 import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -14,118 +14,83 @@ import org.biojavax.bio.seq.{RichSequence, RichSequenceIterator}
 import org.json.JSONObject


+
 class MicrobeGenomeData extends ConfigurableStop{
  override val authorEmail: String = "yangqidong@cnic.cn"
  override val description: String = "Parse MicrobeGenome data"
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
-    val session = pec.get[SparkSession]()
-
+    val spark = pec.get[SparkSession]()
    val inDf: DataFrame = in.read()
+
    val configuration: Configuration = new Configuration()
-    var pathStr: String = ""
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
-    try{
-      pathStr =inDf.take(1)(0).get(0).asInstanceOf[String]
-      val pathARR: Array[String] = pathStr.split("\\/")
-
-      for (x <- (0 until 3)){
-        hdfsUrl+=(pathARR(x) +"/")
-      }
-    }catch {
-      case e:Exception => throw new Exception("Path error")
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/NCBI_Microbe_genome_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/microbeGenomeCache/microbeGenomeCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)

-    var bis: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))
+
    var fdis: FSDataInputStream =null
    var br: BufferedReader = null
    var sequences: RichSequenceIterator = null
    var doc: JSONObject = null
    var seq: RichSequence = null
-    var jsonStr: String = ""
-    var n:Int=0
+
+    var count:Int=0
    inDf.collect().foreach(row => {

      pathStr = row.get(0).asInstanceOf[String]
-      println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   start parser ^^^" + pathStr)
+
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      sequences = CustomIOTools.IOTools.readGenbankProtein(br, null)

      while (sequences.hasNext) {
-        n += 1
+        count += 1
        doc = new JSONObject()
        seq = sequences.nextRichSequence()
-        Process.processSingleSequence(seq,doc)
+        Process.processSingleSequence(seq, doc)

-        jsonStr = doc.toString
+        doc.write(hdfsWriter)
+        hdfsWriter.write("\n")

-        if (n == 1) {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-        } else {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
-        }
-        var count: Int = bis.read(buff)
-        while (count != -1) {
-          fdos.write(buff, 0, count)
-          fdos.flush()
-          count = bis.read(buff)
-        }
-        fdos.flush()
-
-        bis.close()
-        bis = null
-        doc = null
        seq = null
-        jsonStr = ""
-
-      }
-      sequences = null
-      br.close()
-      br = null
-      fdis.close()
-      fdis =null
-      pathStr = null
-
+        }
+        br.close()
+        fdis.close()
    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-    bis.close()
-    fdos.close()
-
-    out.write(session.read.json(hdfsPathTemporary))
+    hdfsWriter.close()

+    println("start parser HDFSjsonFile")
+    val df: DataFrame = spark.read.json(hdfsPathTemporary)
+    out.write(df)
  }

-  override def setProperties(map: Map[String, Any]): Unit = {

+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/microbeGenome").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/PDBData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/PDBData.scala
@ -4,7 +4,7 @@ import java.io._

 import cn.piflow.bundle.microorganism.util.PDB
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -19,105 +19,61 @@ class PDBData extends ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-
+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
+
    val configuration: Configuration = new Configuration()
-    var pathStr: String = ""
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
-    try{
-      pathStr =inDf.take(1)(0).get(0).asInstanceOf[String]
-      val pathARR: Array[String] = pathStr.split("\\/")
-
-      for (x <- (0 until 3)){
-        hdfsUrl+=(pathARR(x) +"/")
-      }
-    }catch {
-      case e:Exception => throw new Exception("Path error")
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/PDBCache/PDBCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)

-    var bis: BufferedInputStream =null
-    var fdis: FSDataInputStream =null
-    var br: BufferedReader = null
-    var sequences: RichSequenceIterator = null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))
+
    var doc: JSONObject = null
-    var seq: RichSequence = null
    var pdb: PDB = null
-    var jsonStr: String = ""
-    var n:Int=0
+    var count:Int=0
    inDf.collect().foreach(row => {
+      count += 1
      pathStr = row.get(0).asInstanceOf[String]

      pdb = new PDB(pathStr,fs)
      doc = pdb.getDoc

-      jsonStr = doc.toString
-      n +=1
+      doc.write(hdfsWriter)
+      hdfsWriter.write("\n")

-      if (n == 1) {
-        bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-      } else {
-        bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
-      }
-      var count: Int = bis.read(buff)
-      while (count != -1) {
-        fdos.write(buff, 0, count)
-        fdos.flush()
-        count = bis.read(buff)
-      }
-      fdos.flush()
-
-      bis = null
      doc = null
-      seq = null
-      jsonStr = ""
-      sequences = null
-      br = null
-      fdis =null
-      pathStr = null
-      pdb = null
    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-    bis.close()
-    fdos.close()
+    hdfsWriter.close()

    val df: DataFrame = session.read.json(hdfsPathTemporary)
-
    out.write(df)
 }

-
-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/PDB").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Pathway.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/Pathway.scala
@ -5,24 +5,30 @@ import java.io.{BufferedReader, InputStreamReader, OutputStreamWriter}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.json.JSONObject

+
 class Pathway extends ConfigurableStop{
  override val authorEmail: String = "yangqidong@cnic.cn"
  override val description: String = "Parse Pathway data"
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-  override def setProperties(map: Map[String, Any]): Unit = {

+  var cachePath:String = _
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }

-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/pathway").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

@ -51,8 +57,8 @@ class Pathway extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-//    "/biosampleCache/biosampleCache.json"
-    val hdfsPathTemporary:String = hdfsUrl+"/yqd/test/kegg/Refseq_genomeParser_temporary2.json"
+
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/pathwayCache/pathwayCache.json"
    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
@ -67,17 +73,15 @@ class Pathway extends ConfigurableStop{

    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
-      println("start parser ^^^" + pathStr)
+
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      var count = 0
      while (hasAnotherSequence) {
          count += 1
-          println(count)
          doc = new JSONObject
          hasAnotherSequence = util.KeggPathway.process(br, doc)

-
          doc.write(hdfsWriter)
          hdfsWriter.write("\n")
        }
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/PfamData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/PfamData.scala
@ -1,10 +1,10 @@
 package cn.piflow.bundle.microorganism

-import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.io._

 import cn.piflow.bundle.microorganism.util.Pfam
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -20,108 +20,71 @@ class PfamData extends ConfigurableStop{
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)


+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
+
    val configuration: Configuration = new Configuration()
-    var pathStr: String = ""
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
-    try{
-      pathStr =inDf.take(1)(0).get(0).asInstanceOf[String]
-      val pathARR: Array[String] = pathStr.split("\\/")
-
-      for (x <- (0 until 3)){
-        hdfsUrl+=(pathARR(x) +"/")
-      }
-    }catch {
-      case e:Exception => throw new Exception("Path error")
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/pfamCache/pfamCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)

-    var bis: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))
+
    var fdis: FSDataInputStream =null
    var br: BufferedReader = null
-    var sequences: RichSequenceIterator = null
+
    var doc: JSONObject = null
    var seq: RichSequence = null
    var hasAnotherSequence : Boolean=true
-    var jsonStr: String = ""
-    var n:Int=0
+
+    var count:Int=0
    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))

-      while( hasAnotherSequence  &&  n < 1000 ){
-        n += 1
-
+      while( hasAnotherSequence ){
+        count += 1
        doc = new JSONObject()
        hasAnotherSequence = Pfam.process(br,doc)

-        jsonStr = doc.toString
+        doc.write(hdfsWriter)
+        hdfsWriter.write("\n")

-        if (n == 1) {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-        } else {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
-        }
-        var count: Int = bis.read(buff)
-        while (count != -1) {
-          fdos.write(buff, 0, count)
-          fdos.flush()
-          count = bis.read(buff)
-        }
-        fdos.flush()
-
-        bis = null
-        doc = null
-        seq = null
-        jsonStr = ""
      }
-
-      sequences = null
-      br = null
-      fdis =null
-      pathStr = null
+      br.close()
+      fdis.close()
    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-    bis.close()
-    fdos.close()
+    hdfsWriter.close()

    val df: DataFrame = session.read.json(hdfsPathTemporary)
-
    out.write(df)
  }


-  override def setProperties(map: Map[String, Any]): Unit = {
-
+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/pfam").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqData.scala
@ -4,7 +4,7 @@ import java.io._

 import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -19,15 +19,13 @@ class RefseqData extends ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-
-
+  var cachePath:String = _
  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
-    val configuration: Configuration = new Configuration()

+    val configuration: Configuration = new Configuration()
    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
@ -37,88 +35,62 @@ class RefseqData extends ConfigurableStop{
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/refseqCache/refseqCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)

-    var jsonStr: String =""
-
-    var bis: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

+    var doc: JSONObject= null
+    var seq: RichSequence = null
+    var br: BufferedReader = null
+    var fdis: FSDataInputStream =null
+    var sequences: RichSequenceIterator = null
+    var count : Int = 0
    inDf.collect().foreach(row => {

-      var n : Int =0
      pathStr = row.get(0).asInstanceOf[String]

+      fdis = fs.open(new Path(pathStr))
+      br = new BufferedReader(new InputStreamReader(fdis))
+      sequences = CustomIOTools.IOTools.readGenbankProtein(br, null)

-      var fdis: FSDataInputStream = fs.open(new Path(pathStr))
+        while (sequences.hasNext) {
+          count += 1
+          seq = sequences.nextRichSequence()
+          doc = new JSONObject
+          Process.processSingleSequence(seq, doc)

-      var br: BufferedReader = new BufferedReader(new InputStreamReader(fdis))
-
-      var sequences: RichSequenceIterator = CustomIOTools.IOTools.readGenbankProtein(br, null)
-
-      while (sequences.hasNext) {
-        n += 1
-        var seq: RichSequence = sequences.nextRichSequence()
-        var doc: JSONObject = new JSONObject
-        Process.processSingleSequence(seq, doc)
-        jsonStr = doc.toString
-        println("start " + n)
-
-        if (n == 1) {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-        } else {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")
        }
-
-        var count: Int = bis.read(buff)
-        while (count != -1) {
-          fdos.write(buff, 0, count)
-          fdos.flush()
-          count = bis.read(buff)
-        }
-        fdos.flush()
-        bis = null
-        seq = null
-        doc = null
-      }
+        fdis.close()
+        br.close()

    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-    bis.close()
-    fdos.close()
-
+    hdfsWriter.close()
    println("start parser HDFSjsonFile")
    val df: DataFrame = session.read.json(hdfsPathTemporary)

    out.write(df)

-
  }

-  override def setProperties(map: Map[String, Any]): Unit = {

+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
+  }
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
+    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/refseqData").required(true)
+    descriptor = cachePath :: descriptor
+    descriptor
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
-  var descriptor : List[PropertyDescriptor] = List()
-  descriptor
-}

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/microorganism/RefseqData.png")
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/SwissprotData.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/SwissprotData.scala
@ -1,10 +1,10 @@
 package cn.piflow.bundle.microorganism

-import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.io._

 import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
 import cn.piflow.conf.bean.PropertyDescriptor
-import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
 import cn.piflow.conf.{ConfigurableStop, PortEnum, StopGroup}
 import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
 import org.apache.hadoop.conf.Configuration
@ -19,12 +19,16 @@ class SwissprotData extends ConfigurableStop{
  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)

-  override def setProperties(map: Map[String, Any]): Unit = {
+  var cachePath:String = _

+  def setProperties(map: Map[String, Any]): Unit = {
+    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }
-
-  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
+    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
+      .defaultValue("/swissprot").required(true)
+    descriptor = cachePath :: descriptor
    descriptor
  }

@ -41,96 +45,58 @@ class SwissprotData extends ConfigurableStop{
  }

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
-
    val session = pec.get[SparkSession]()
-
    val inDf: DataFrame = in.read()
+
    val configuration: Configuration = new Configuration()
-    var pathStr: String = ""
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
-    try{
-      pathStr =inDf.take(1)(0).get(0).asInstanceOf[String]
-      val pathARR: Array[String] = pathStr.split("\\/")
-
-      for (x <- (0 until 3)){
-        hdfsUrl+=(pathARR(x) +"/")
-      }
-    }catch {
-      case e:Exception => throw new Exception("Path error")
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
    }
-
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)

-    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
-    val path: Path = new Path(hdfsPathTemporary)
+    val hdfsPathTemporary = hdfsUrl+cachePath+"/swissprotCache/swissprotCache.json"

+    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
-
    fs.create(path).close()
-    var fdos: FSDataOutputStream = fs.append(path)
-    val buff: Array[Byte] = new Array[Byte](1048576)

-    var bis: BufferedInputStream =null
+    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))
+
    var fdis: FSDataInputStream =null
    var br: BufferedReader = null
    var sequences: RichSequenceIterator = null
    var doc: JSONObject = null
    var seq: RichSequence = null
-    var jsonStr: String = ""
-    var n:Int=0
+    var count:Int=0
    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]
      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      sequences = CustomIOTools.IOTools.readUniProt(br,null)
-
      while (sequences.hasNext) {
+        count += 1
+        if (count < 100) {
+          doc = new JSONObject()
+          seq = sequences.nextRichSequence()
+          Process.processUniprotSeq(seq, doc)

-        n += 1
+          doc.write(hdfsWriter)
+          hdfsWriter.write("\n")

-        doc = new JSONObject()
-        seq = sequences.nextRichSequence()
-        Process.processUniprotSeq(seq,doc)
-        jsonStr = doc.toString
-        println("start " + n + "String\\\n" )
-
-        if (n == 1) {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
-        } else {
-          bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
+          seq = null
+          }
        }
-        var count: Int = bis.read(buff)
-        while (count != -1) {
-          fdos.write(buff, 0, count)
-          fdos.flush()
-          count = bis.read(buff)
-        }
-        fdos.flush()
+        br.close()
+        fdis.close()

-        bis = null
-        doc = null
-        seq = null
-        jsonStr = ""
-      }
-      sequences = null
-      br = null
-      fdis =null
-      pathStr = null
    })
-    bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
-
-    var count: Int = bis.read(buff)
-    while (count != -1) {
-      fdos.write(buff, 0, count)
-      fdos.flush()
-      count = bis.read(buff)
-    }
-    fdos.flush()
-    bis.close()
-    fdos.close()
+    hdfsWriter.close()

    val df: DataFrame = session.read.json(hdfsPathTemporary)

--- a/piflow-bundle/src/main/scala/cn/piflow/conf/bean/FlowBean.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/conf/bean/FlowBean.scala
@ -13,6 +13,7 @@ class FlowBean {
  var checkpoint : String = _
  var checkpointParentProcessId : String = _
  var runMode : String = _
+  var showData : String = _
  var stops : List[StopBean] = List()
  var paths : List[PathBean] = List()

@ -25,6 +26,7 @@ class FlowBean {
    this.checkpoint = flowMap.getOrElse("checkpoint","").asInstanceOf[String]
    this.checkpointParentProcessId = flowMap.getOrElse("checkpointParentProcessId", "").asInstanceOf[String]
    this.runMode = flowMap.getOrElse("runMode","RUN").asInstanceOf[String]
+    this.showData = flowMap.getOrElse("showData","0").asInstanceOf[String]

    //construct StopBean List
    val stopsList = MapUtil.get(flowMap,"stops").asInstanceOf[List[Map[String, Any]]]
--- a/piflow-core/src/main/scala/cn/piflow/main.scala
+++ b/piflow-core/src/main/scala/cn/piflow/main.scala
@ -378,13 +378,12 @@ class JobOutputStreamImpl() extends JobOutputStream with Logging {

  def getDataFrame(port: String) = mapDataFrame(port);

-  def showData() = {
+  def showData(count:Int) = {

      mapDataFrame.foreach(en => {
        val portName = if(en._1.equals("")) "default" else en._1
        println(portName + " port: ")
-
-        en._2.apply().show(PropertyUtil.getPropertyValue("data.show").toInt)
+        en._2.apply().show(count)

      })
  }
@ -484,7 +483,10 @@ class ProcessImpl(flow: Flow, runnerContext: Context, runner: Runner, parentProc
          runnerListener.onJobCompleted(pe.getContext());

          //show data in log
-          outputs.showData()
+          val showDataCount = PropertyUtil.getPropertyValue("data.show").toInt
+          if(showDataCount > 0) {
+            outputs.showData(showDataCount)
+          }
        }
        catch {
          case e: Throwable =>
@ -528,7 +530,10 @@ class ProcessImpl(flow: Flow, runnerContext: Context, runner: Runner, parentProc
            }
          }
          //show data in log
-          outputs.showData()
+          val showDataCount = PropertyUtil.getPropertyValue("data.show").toInt
+          if(showDataCount > 0) {
+            outputs.showData(showDataCount)
+          }

          //save data in debug mode
          if(flow.getRunMode() == FlowRunMode.DEBUG) {
--- a/piflow-server/pom.xml
+++ b/piflow-server/pom.xml
@ -10,6 +10,12 @@
    <modelVersion>4.0.0</modelVersion>

    <artifactId>piflow-server</artifactId>
+
+    <properties>
+        <akka.version>2.4.14</akka.version>
+        <akka.http.version>10.0.0</akka.http.version>
+    </properties>
+
    <dependencies>
        <dependency>
            <groupId>piflow</groupId>
@ -30,40 +36,31 @@
        <dependency>
            <groupId>com.typesafe.akka</groupId>
            <artifactId>akka-remote_2.11</artifactId>
-            <version>2.3.14</version>
+            <version>${akka.version}</version>
        </dependency>

        <dependency>
            <groupId>com.typesafe.akka</groupId>
            <artifactId>akka-actor_2.11</artifactId>
-            <version>2.3.14</version>
+            <version>${akka.version}</version>
        </dependency>
        <dependency>
            <groupId>com.typesafe.akka</groupId>
            <artifactId>akka-slf4j_2.11</artifactId>
-            <version>2.3.14</version>
+            <version>${akka.version}</version>
        </dependency>
        <dependency>
            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-stream-experimental_2.11</artifactId>
-            <version>2.0.4</version>
+            <artifactId>akka-http_2.11</artifactId>
+            <version>${akka.http.version}</version>
        </dependency>
        <dependency>
            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-http-core-experimental_2.11</artifactId>
-            <version>2.0.4</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-http-experimental_2.11</artifactId>
-            <version>2.0.4</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-http-spray-json-experimental_2.11</artifactId>
-            <version>2.0.4</version>
+            <artifactId>akka-http-spray-json_2.11</artifactId>
+            <version>${akka.http.version}</version>
        </dependency>

+
    </dependencies>


--- a/piflow-server/src/main/resources/application.conf
+++ b/piflow-server/src/main/resources/application.conf
@ -0,0 +1,10 @@
+PiFlowHTTPService{
+  shutdown-timeout:300s
+}
+
+akka {
+  http {
+    idle-timeout = 600 s
+    request-timeout = 200 s
+  }
+}
--- a/piflow-server/src/main/scala/cn/piflow/api/HTTPService.scala
+++ b/piflow-server/src/main/scala/cn/piflow/api/HTTPService.scala
@ -19,7 +19,8 @@ import spray.json.DefaultJsonProtocol


 object HTTPService extends DefaultJsonProtocol with Directives with SprayJsonSupport{
-  implicit val system = ActorSystem("HTTPService", ConfigFactory.load())
+  implicit val config = ConfigFactory.load()
+  implicit val system = ActorSystem("PiFlowHTTPService", config)
  implicit val materializer = ActorMaterializer()
  implicit val executionContext = system.dispatcher
  var processMap = Map[String, SparkAppHandle]()