From 2d1685af1af9f8139f4a18467b9b88f24830eed9 Mon Sep 17 00:00:00 2001
From: yanfqidong0604 <wuiwuduzun521>
Date: Fri, 21 Dec 2018 12:20:58 +0800
Subject: [PATCH 1/3] Analysis of microbial data EMBL, Refseq_genome,
 Refseq_protein and related components. FTP server download, HDFS file
 screening, HDFS file decompression optimization. QiDong Yang

---
 .../resources/microorganism/EMBL_Logo.svg     |   76 +
 .../main/resources/microorganism/down.json    |   31 +
 .../resources/microorganism/embl_parser.json  |   67 +
 .../main/resources/microorganism/refseq.png   |  Bin 0 -> 1561 bytes
 .../microorganism/refseq_genome.json          |   67 +
 .../resources/microorganism/select_unzip.json |   37 +
 .../piflow/bundle/ftp/LoadFromFtpToHDFS.scala |  141 ++
 .../piflow/bundle/ftp/SelectFilesByName.scala |  109 ++
 .../piflow/bundle/http/UnzipFilesOnHDFS.scala |  219 ++-
 .../bundle/microorganism/EmblParser.scala     |  168 +++
 .../bundle/microorganism/RefseqParser.scala   |  168 +++
 .../microorganism/util/CustomEMBLFormat.java  | 1151 +++++++++++++++
 .../util/CustomEnsemblFormat.java             | 1133 +++++++++++++++
 .../microorganism/util/CustomIOTools.java     |   20 +-
 .../util/CustomUniProtFormat.java             | 1291 +++++++++++++++++
 .../bundle/microorganism/util/ProcessNew.java |  571 ++++++++
 .../scala/cn/piflow/bundle/ftp/emblTest.scala |   87 ++
 17 files changed, 5198 insertions(+), 138 deletions(-)
 create mode 100644 piflow-bundle/src/main/resources/microorganism/EMBL_Logo.svg
 create mode 100644 piflow-bundle/src/main/resources/microorganism/down.json
 create mode 100644 piflow-bundle/src/main/resources/microorganism/embl_parser.json
 create mode 100644 piflow-bundle/src/main/resources/microorganism/refseq.png
 create mode 100644 piflow-bundle/src/main/resources/microorganism/refseq_genome.json
 create mode 100644 piflow-bundle/src/main/resources/microorganism/select_unzip.json
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/LoadFromFtpToHDFS.scala
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/SelectFilesByName.scala
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblParser.scala
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqParser.scala
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEMBLFormat.java
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEnsemblFormat.java
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomUniProtFormat.java
 create mode 100644 piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/ProcessNew.java
 create mode 100644 piflow-bundle/src/test/scala/cn/piflow/bundle/ftp/emblTest.scala
diff --git a/piflow-bundle/src/main/resources/microorganism/EMBL_Logo.svg b/piflow-bundle/src/main/resources/microorganism/EMBL_Logo.svg
new file mode 100644
index 0000000..20959c9
--- /dev/null
+++ b/piflow-bundle/src/main/resources/microorganism/EMBL_Logo.svg
@@ -0,0 +1,76 @@
+<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 587 181" >
+<defs>
+<circle id="a" r="7.5" fill="#6DAB49"/>
+<circle id="b" r="7.5" fill="#DA0F21"/>
+</defs>
+<use xlink:href="#a" transform="translate(467 28.1)"/>
+<use xlink:href="#a" transform="translate(467 48.7)"/>
+<use xlink:href="#a" transform="translate(467 69.2)"/>
+<use xlink:href="#a" transform="translate(467 89.7)"/>
+<use xlink:href="#a" transform="translate(467 130.9)"/>
+<use xlink:href="#a" transform="translate(467 151.5)"/>
+<use xlink:href="#a" transform="translate(430 48.6)"/>
+<use xlink:href="#a" transform="translate(430 69.3)"/>
+<use xlink:href="#a" transform="translate(430 89.7)"/>
+<use xlink:href="#a" transform="translate(430 110.3)"/>
+<use xlink:href="#a" transform="translate(430 130.8)"/>
+<use xlink:href="#a" transform="translate(449 38.4)"/>
+<use xlink:href="#a" transform="translate(449 59)"/>
+<use xlink:href="#a" transform="translate(449 79.5)"/>
+<use xlink:href="#a" transform="translate(449 100)"/>
+<use xlink:href="#a" transform="translate(449 120.5)"/>
+<use xlink:href="#a" transform="translate(449 141)"/>
+<use xlink:href="#a" transform="translate(576.7 48.6)"/>
+<use xlink:href="#a" transform="translate(576.7 69)"/>
+<use xlink:href="#a" transform="translate(576.7 89.7)"/>
+<use xlink:href="#a" transform="translate(576.7 110)"/>
+<use xlink:href="#a" transform="translate(576.7 131)"/>
+<use xlink:href="#a" transform="translate(558 38.4)"/>
+<use xlink:href="#a" transform="translate(558 59)"/>
+<use xlink:href="#a" transform="translate(558 79.5)"/>
+<use xlink:href="#a" transform="translate(558 100)"/>
+<use xlink:href="#a" transform="translate(558 120.5)"/>
+<use xlink:href="#a" transform="translate(558 141)"/>
+<use xlink:href="#a" transform="translate(540 28)"/>
+<use xlink:href="#a" transform="translate(540 48.7)"/>
+<use xlink:href="#a" transform="translate(540 69)"/>
+<use xlink:href="#a" transform="translate(540 89.7)"/>
+<use xlink:href="#a" transform="translate(540 110)"/>
+<use xlink:href="#a" transform="translate(540 131)"/>
+<use xlink:href="#a" transform="translate(540 151.5)"/>
+<use xlink:href="#b" transform="translate(467 110)"/>
+<use xlink:href="#a" transform="translate(522 18)"/>
+<use xlink:href="#a" transform="translate(522 38)"/>
+<use xlink:href="#a" transform="translate(522 59)"/>
+<use xlink:href="#a" transform="translate(522 79)"/>
+<use xlink:href="#a" transform="translate(522 100)"/>
+<use xlink:href="#a" transform="translate(522 120.5)"/>
+<use xlink:href="#a" transform="translate(522 141)"/>
+<use xlink:href="#a" transform="translate(522 161.6)"/>
+<use xlink:href="#a" transform="translate(503.5 7.5)"/>
+<use xlink:href="#a" transform="translate(503.5 28)"/>
+<use xlink:href="#a" transform="translate(503.5 48.6)"/>
+<use xlink:href="#a" transform="translate(503.5 69)"/>
+<use xlink:href="#a" transform="translate(503.5 90)"/>
+<use xlink:href="#a" transform="translate(503.5 110.3)"/>
+<use xlink:href="#a" transform="translate(503.5 131)"/>
+<use xlink:href="#a" transform="translate(503.5 151.4)"/>
+<use xlink:href="#a" transform="translate(485 17.7)"/>
+<use xlink:href="#a" transform="translate(485 38.3)"/>
+<use xlink:href="#a" transform="translate(485 58.8)"/>
+<use xlink:href="#a" transform="translate(485 79.4)"/>
+<use xlink:href="#a" transform="translate(485 100)"/>
+<use xlink:href="#a" transform="translate(485 120.5)"/>
+<use xlink:href="#a" transform="translate(485 141)"/>
+<use xlink:href="#a" transform="translate(485 161.6)"/>
+<use xlink:href="#a" transform="translate(503.5 172)"/>
+<path id="E" d="M-22.8 31H22v-7.7h-36.7V4.5h34v-7h-34v-21h37.5V-31h-45.6z" transform="matrix(1 0 0 -1 26.2 89.6)" fill="#fff"/>
+<path d="M-29.6 31h12L.2-21.4 17.8 31h12v-62h-8v52.3l-17.6-52H-4l-17.8 52v-52h-8z" transform="matrix(1 0 0 -1 86 90)" fill="#fff"/>
+<path id="B" d="M-24 30.8H2.3c9.4 0 21.7-4.7 21.7-18 0-11.4-8-14-11.3-15.2C15-3.4 21-6.7 21-15.6 21-28 10-31 2.8-31H-24V31zM-15.7-5v-19h15c3.8 0 13.4.2 13.4 9.4 0 9.6-9.4 9.8-13 9.8h-15.4zM2.5 23.7h-18.2V2H1c3.6 0 14.7.3 14.7 10.4 0 9.7-8 11.3-13.2 11.2z" transform="translate(155 90)" fill="#fff"/>
+<path id="I" d="M-4.2 31h8.4v-62h-8.4v62z" transform="translate(193 90)" fill="#fff"/>
+<use xlink:href="#I" transform="matrix(0 .87 .64 0 151.5 -50.35)"/>
+<use xlink:href="#I" transform="matrix(0 .75 .4 0 210 -58)"/>
+<use xlink:href="#E" transform="translate(267.8 0)"/>
+<use xlink:href="#B" transform="translate(193.4 0)"/>
+<use xlink:href="#I" transform="translate(192 0)"/>
+</svg>
diff --git a/piflow-bundle/src/main/resources/microorganism/down.json b/piflow-bundle/src/main/resources/microorganism/down.json
new file mode 100644
index 0000000..8c60db1
--- /dev/null
+++ b/piflow-bundle/src/main/resources/microorganism/down.json
@@ -0,0 +1,31 @@
+{
+  "flow":{
+    "name":"test",
+    "uuid":"1234",
+    "stops":[
+      {
+        "uuid":"1111",
+        "name":"LoadFromFtpToHDFS",
+        "bundle":"cn.piflow.bundle.ftp.LoadFromFtpToHDFS",
+        "properties":{
+          "url_str":"ftp.ebi.ac.uk",
+          "port":"",
+          "username":"",
+          "password":"",
+          "ftpFile":"/pub/databases/ena/sequence/release/con/rel_con_env_07_r138.dat.gz",
+          "HDFSUrl":"hdfs://10.0.88.70:9000",
+          "HDFSPath":"/yqd/weishengwu/embl/",
+          "isFile":"true"
+        }
+      }
+    ],
+    "paths":[
+      {
+        "from":"",
+        "outport":"",
+        "inport":"",
+        "to":""
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/piflow-bundle/src/main/resources/microorganism/embl_parser.json b/piflow-bundle/src/main/resources/microorganism/embl_parser.json
new file mode 100644
index 0000000..dc40445
--- /dev/null
+++ b/piflow-bundle/src/main/resources/microorganism/embl_parser.json
@@ -0,0 +1,67 @@
+{
+  "flow":{
+    "name":"test",
+    "uuid":"1234",
+    "stops":[
+
+     {
+        "uuid":"1111",
+        "name":"SelectFilesByName",
+        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "properties":{
+          "HDFSUrl":"hdfs://10.0.88.70:9000",
+          "HDFSPath":"/yqd/weishengwu/embl",
+          "selectionConditions":".*con_pro_02_r138.dat.gz,.*con_vrl_01_r138.dat.gz,.*pat_phg_01_r138.dat.gz"
+        }
+      },{
+        "uuid":"2222",
+        "name":"UnzipFilesOnHDFS_1",
+        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
+        "properties":{
+          "isCustomize":"false",
+          "filePath":"",
+          "fileType":"gz",
+          "unzipPath":""
+
+        }
+      },
+      {
+        "uuid":"3333",
+        "name":"EmblParser",
+        "bundle":"cn.piflow.bundle.microorganism.EmblParser",
+        "properties":{
+        }
+      },{
+        "uuid":"4444",
+        "name":"PutEs",
+        "bundle":"cn.piflow.bundle.es.PutEs",
+        "properties":{
+          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
+          "port": "9200",
+          "es_index": "embl",
+          "es_type": "embl"
+        }
+      }
+    ],
+    "paths":[
+      {
+        "from":"SelectFilesByName",
+        "outport":"",
+        "inport":"",
+        "to":"UnzipFilesOnHDFS_1"
+      },
+      {
+        "from":"UnzipFilesOnHDFS_1",
+        "outport":"",
+        "inport":"",
+        "to":"EmblParser"
+      },
+      {
+        "from":"EmblParser",
+        "outport":"",
+        "inport":"",
+        "to":"PutEs"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/piflow-bundle/src/main/resources/microorganism/refseq.png b/piflow-bundle/src/main/resources/microorganism/refseq.png
new file mode 100644
index 0000000000000000000000000000000000000000..5448ab907620939434f07a5da5a35f4304f40297
GIT binary patch
literal 1561
zcmV+!2Il#RP)<h;3K|Lk000e1NJLTq0043T001xu1^@s6%}p*E00001b5ch_0Itp)
z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D1+Ga%K~#8N?OGwz
zt2z+nUm%f{l}IEKi9{li`~)Nt$;>m%Q_NFHBobMPL?V&rZGb>%1GL}$dT&2&&diFE
zG;H={H@n+vP&fq?ktcy7@+441o&<`>lRy!95-1{10!8FWa4I6ZTaL)HaM3krng+uT
zP&Cuv$?-CgRWusuSae)02Y#HPYWwNi??aP&FmxAL)1!{fMGwCKe|y&yNPGvTvFK@-
z792lDUN<PiE3~MZX8$aF$s_x&L6x}Bxv{hetL^t4nlgqn8%&2C*Rw&t8+{eQr1w}U
zcfSTjV3B?Xyu78)9g#GBPHS{rk-kNp1+XR$9^`K#azurQZ~mT0jwXXM&(YxyM*VIy
zNbPa`sGc6J&{Gr{J=DqMakjrDi=K)|8G7;reVRjJrDXPRMjhiitnVO`os%YoJ&e29
zJ4a1wk2+E)?xtry32!D6XftP;vp?^vXmfX3TTWy%zdt~^7u)+1Aq%lihIhn`cjLzs
zS;v&13~S4YT#~aZ=$hWa6g?D?ax|$m%^`V%krtwkJW_-1X1k%iF9SuSoJgLCFwx-`
z2e(gmLmqjsF8p3KU|JrctXdnFvQ$partqX?zC%YbyoAc3ap+aFWdh$ZVN7f37go9?
zRIBdqtF5wCeapV*1u4owk1-iD_QS;?-biGRTf)6Thed|nyP`TGhm`@Np{JQt##FIA
zlsURZ?~p8T<dI}X<gSo#aee4L#jV@bX+eX`Goa}f!u&QTL;Wp>Z?g4RnIrM_zDDBE
z_voEwn1fz7f*shY9CF-oe~w)Fr|}jd`=&xV)N!o4;v=aJJdwPH)qyOmJ({CK?v3x!
z!}7uK$3zmyps{^V<|KxVAieYW6*?3?`|cH?>?F+|Z(}ZF8G5qMKr-naJnBsoPzH-m
zA3~%|E{Jy1M2;o=1{?F2d(OvwbbgA+0Ud66Bs3}R#BdNx4qRV`!-3o`ohpRW8do7a
zF5RU&f<*{d#Ma)Q#kYGT@3IWf%@~J~|ELaRm^b`b^gmA|M~8ZoP#x~7X~4X38ry6X
zw8)%kj+>93-4*cpntREUK;*CXA>I&Lmre>3lPte?*2Jvx8M)u^>qK&NXo?u_oLF)3
zkk5{}c-vFLNvp#7nJ#ZPjreRL8R$-$?gvdXNy%+0zfQ!6U;X|Na#OKytSzm?R%kK4
zU}>>--o5)ipGd;o8Y3}je1pps5nFf34t9xTMCjpuM`yq~@t8>yi46Z%A_;T<lH^K;
zkl9lVtuNSW#vze(?}g7K5@b~Fl}pkhgjQ)ry#XP#ZZ+x${1hVBAy$`XW<tR(W_{1%
zm`ECXrduO;-=e3G>_{@^O^C^kwDg$pbBHAH8j*=EGloF+bwA5(GY?*|revSU9PTjo
zL|2I)sJl?y75?n0-jW49g4K-<*I)S6XD_sW9+7*ewMQ*vv9f-rShCp3ZF+i#CWC9d
zT1N^x>Netf%c7KNvwsIj6SD>x?+od|#y-bP-4r7VJ491U&Loy(Z|>=kjJp3RBH2Fx
zia_XLQvH=wXy%{Z??9(Dyz;rM-4-CL*X^-}HZ_u|!y4LIe6&YfCGcz_@%{vK6=I>6
z`>2C>*q9h_;zC%PZRLg{vZ1r~Te)!>#m<o7A*q^0_o3Zp)4GpkR)0(34fQJD(=@<p
z*(^d7EcwB2Cvx$CEs=*=F+^?7Z7XD~X>N}FNoIhHtZ7cz)nmRB8Ijk4AlA5!eFv6x
z5%d_F@<CeS_-{q*@Uo`)()`v8^SEi=H7F7vHm_Ip3*3nPkBP=UBeW{~G8B;~fg<uG
zP(+>t0+C;7hKNu^{wee=%2ipGprWMdFz4EUBJ#IG_}NP)&vc<AP(+>tipY~d5qT24
zMC3pJ{-@+aMS0{&polyP6p<%^BJw0qM4kkS$df=3c@iihPXXY6JtvUg5V=`^00000
LNkvXXu0mjfPAK9j

literal 0
HcmV?d00001

diff --git a/piflow-bundle/src/main/resources/microorganism/refseq_genome.json b/piflow-bundle/src/main/resources/microorganism/refseq_genome.json
new file mode 100644
index 0000000..98f997a
--- /dev/null
+++ b/piflow-bundle/src/main/resources/microorganism/refseq_genome.json
@@ -0,0 +1,67 @@
+{
+  "flow":{
+    "name":"test",
+    "uuid":"1234",
+    "stops":[
+
+     {
+        "uuid":"1111",
+        "name":"SelectFilesByName",
+        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "properties":{
+          "HDFSUrl":"hdfs://10.0.88.70:9000",
+          "HDFSPath":"/yqd/weishengwu/refseq/",
+          "selectionConditions":".*genomic.gbff.gz"
+        }
+      },{
+        "uuid":"2222",
+        "name":"UnzipFilesOnHDFS_1",
+        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
+        "properties":{
+          "isCustomize":"false",
+          "filePath":"",
+          "fileType":"gz",
+          "unzipPath":""
+
+        }
+      },
+      {
+        "uuid":"3333",
+        "name":"Refseq_genomeParser",
+        "bundle":"cn.piflow.bundle.microorganism.Refseq_genomeParser",
+        "properties":{
+        }
+      },{
+        "uuid":"4444",
+        "name":"PutEs",
+        "bundle":"cn.piflow.bundle.es.PutEs",
+        "properties":{
+          "es_nodes": "10.0.88.70,10.0.88.71,10.0.88.72",
+          "port": "9200",
+          "es_index": "genome",
+          "es_type": "archaea"
+        }
+      }
+    ],
+    "paths":[
+      {
+        "from":"SelectFilesByName",
+        "outport":"",
+        "inport":"",
+        "to":"UnzipFilesOnHDFS_1"
+      },
+      {
+        "from":"UnzipFilesOnHDFS_1",
+        "outport":"",
+        "inport":"",
+        "to":"Refseq_genomeParser"
+      },
+      {
+        "from":"Refseq_genomeParser",
+        "outport":"",
+        "inport":"",
+        "to":"PutEs"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/piflow-bundle/src/main/resources/microorganism/select_unzip.json b/piflow-bundle/src/main/resources/microorganism/select_unzip.json
new file mode 100644
index 0000000..29c65d2
--- /dev/null
+++ b/piflow-bundle/src/main/resources/microorganism/select_unzip.json
@@ -0,0 +1,37 @@
+{
+  "flow":{
+    "name":"test",
+    "uuid":"1234",
+    "stops":[
+      {
+        "uuid":"0000",
+        "name":"SelectFilesByName",
+        "bundle":"cn.piflow.bundle.ftp.SelectFilesByName",
+        "properties":{
+          "HDFSUrl":"hdfs://10.0.88.70:9000",
+          "HDFSPath":"/yqd/",
+          "selectionConditions":".*genomic.gbff.gz"
+        }
+      },{
+        "uuid":"1111",
+        "name":"UnzipFilesOnHDFS_1",
+        "bundle":"cn.piflow.bundle.http.UnzipFilesOnHDFS_1",
+        "properties":{
+          "isCustomize":"true",
+          "filePath":"hdfs://10.0.88.70:9000/yqd/archaea.1.genomic.gbff.gz",
+          "fileType":"gz",
+          "unzipPath":"hdfs://10.0.88.70:9000/yqd/weishengwu/"
+
+        }
+      }
+    ],
+    "paths":[
+      {
+        "from":"SelectFilesByName",
+        "outport":"",
+        "inport":"",
+        "to":"UnzipFilesOnHDFS_1"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/LoadFromFtpToHDFS.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/LoadFromFtpToHDFS.scala
new file mode 100644
index 0000000..211ba74
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/LoadFromFtpToHDFS.scala
@@ -0,0 +1,141 @@
+package cn.piflow.bundle.ftp
+
+import cn.piflow.conf.bean.PropertyDescriptor
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
+import cn.piflow.conf.{ConfigurableStop, PortEnum}
+import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
+import org.apache.commons.net.ftp.{FTP, FTPClient, FTPClientConfig, FTPFile}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path}
+
+class LoadFromFtpToHDFS extends ConfigurableStop {
+  override val authorEmail: String = "yangqidong@cnic.cn"
+  override val description: String = "Load file from ftp server save on HDFS"
+  override val inportList: List[String] = List(PortEnum.NonePort.toString)
+  override val outportList: List[String] = List(PortEnum.NonePort.toString)
+
+  var url_str:String =_
+  var port:String=_
+  var username:String=_
+  var password:String=_
+  var ftpFile:String=_
+  var HDFSUrl:String=_
+  var HDFSPath:String=_
+  var isFile:String=_
+
+  var fs: FileSystem=null
+  var con: FTPClientConfig =null
+
+  def downFile(ftp: FTPClient,ftpFilePath:String,HDFSSavePath:String): Unit = {
+
+      val changeFlag: Boolean = ftp.changeWorkingDirectory(ftpFilePath)
+      val files: Array[FTPFile] = ftp.listFiles()
+      for(x <- files ) {
+        if (x.isFile) {
+          println("down start  ^^^  "+x.getName)
+          val hdfsPath: Path = new Path(HDFSSavePath + x.getName)
+          if(! fs.exists(hdfsPath)){
+            var fdos: FSDataOutputStream = fs.create(hdfsPath)
+            ftp.retrieveFile(new String(x.getName.getBytes("GBK"),"ISO-8859-1"), fdos)
+            fdos.close()
+          }
+        } else {
+          downFile(ftp,ftpFilePath+x.getName+"/",HDFSSavePath+x.getName+"/")
+        }
+      }
+
+  }
+
+
+  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
+
+    val configuration: Configuration = new Configuration()
+    configuration.set("fs.defaultFS", HDFSUrl)
+    fs = FileSystem.get(configuration)
+
+    val ftp:FTPClient = openFtpClient()
+
+    if(isFile.equals("true")){
+      val pathArr: Array[String] = ftpFile.split("/")
+      var dirPath:String=""
+      for(x <- (0 until pathArr.length-1)){
+        dirPath += (pathArr(x)+"/")
+      }
+      ftp.changeWorkingDirectory(dirPath)
+
+      var fdos: FSDataOutputStream = fs.create(new Path(HDFSPath+pathArr.last))
+      ftp.retrieveFile(new String(pathArr.last.getBytes("GBK"),"ISO-8859-1"), fdos)
+      fdos.flush()
+      fdos.close()
+    }else{
+      downFile(ftp,ftpFile,HDFSPath)
+    }
+  }
+
+  def openFtpClient(): FTPClient = {
+    val ftp = new FTPClient
+    if(port.length > 0 ){
+      ftp.connect(url_str,port.toInt)
+    }else{
+      ftp.connect(url_str)
+    }
+    if(username.length > 0 && password.length > 0){
+      ftp.login(username,password)
+    }else{
+      ftp.login("anonymous", "121@hotmail.com")
+    }
+    ftp.setControlEncoding("GBK")
+    con = new FTPClientConfig(FTPClientConfig.SYST_NT)
+    con.setServerLanguageCode("zh")
+    ftp.setFileType(FTP.BINARY_FILE_TYPE)
+    ftp
+  }
+
+
+  override def setProperties(map: Map[String, Any]): Unit = {
+    url_str=MapUtil.get(map,key="url_str").asInstanceOf[String]
+    port=MapUtil.get(map,key="port").asInstanceOf[String]
+    username=MapUtil.get(map,key="username").asInstanceOf[String]
+    password=MapUtil.get(map,key="password").asInstanceOf[String]
+    ftpFile=MapUtil.get(map,key="ftpFile").asInstanceOf[String]
+    HDFSUrl=MapUtil.get(map,key="HDFSUrl").asInstanceOf[String]
+    HDFSPath=MapUtil.get(map,key="HDFSPath").asInstanceOf[String]
+    isFile=MapUtil.get(map,key="isFile").asInstanceOf[String]
+  }
+
+
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
+    var descriptor : List[PropertyDescriptor] = List()
+    val url_str = new PropertyDescriptor().name("url_str").displayName("URL").defaultValue("IP of FTP server, such as 128.136.0.1 or ftp.ei.addfc.gak").required(true)
+    val port = new PropertyDescriptor().name("port").displayName("PORT").defaultValue("Port of FTP server").required(false)
+    val username = new PropertyDescriptor().name("username").displayName("USER_NAME").defaultValue("").required(false)
+    val password = new PropertyDescriptor().name("password").displayName("PASSWORD").defaultValue("").required(false)
+    val ftpFile = new PropertyDescriptor().name("ftpFile").displayName("FTP_File").defaultValue("The path of the file to the FTP server, such as /test/Ab/ or /test/Ab/test.txt").required(true)
+    val HDFSUrl = new PropertyDescriptor().name("HDFSUrl").displayName("HDFSUrl").defaultValue("The URL of the HDFS file system, such as hdfs://10.0.88.70:9000").required(true)
+    val HDFSPath = new PropertyDescriptor().name("HDFSPath").displayName("HDFSPath").defaultValue("The save path of the HDFS file system, such as /test/Ab/").required(true)
+    val isFile = new PropertyDescriptor().name("isFile").displayName("isFile").defaultValue("Whether the path is a file or not, if true is filled in, only a single file specified by the path is downloaded. If false is filled in, all files under the folder are downloaded recursively.").required(true)
+    descriptor = isFile :: descriptor
+    descriptor = url_str :: descriptor
+    descriptor = port :: descriptor
+    descriptor = username :: descriptor
+    descriptor = password :: descriptor
+    descriptor = ftpFile :: descriptor
+    descriptor = HDFSUrl :: descriptor
+    descriptor = HDFSPath :: descriptor
+    descriptor
+}
+
+  override def getIcon(): Array[Byte] = {
+    ImageUtil.getImage("ftp.png")
+  }
+
+  override def getGroup(): List[String] = {
+    List(StopGroupEnum.FtpGroup.toString)
+  }
+
+  override def initialize(ctx: ProcessContext): Unit = {
+
+  }
+
+
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/SelectFilesByName.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/SelectFilesByName.scala
new file mode 100644
index 0000000..d85a045
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/ftp/SelectFilesByName.scala
@@ -0,0 +1,109 @@
+package cn.piflow.bundle.ftp
+
+import java.util.regex.Pattern
+
+import cn.piflow.conf.bean.PropertyDescriptor
+import cn.piflow.conf.util.{ImageUtil, MapUtil}
+import cn.piflow.conf.{ConfigurableStop, PortEnum}
+import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+
+import scala.collection.mutable.ArrayBuffer
+
+class SelectFilesByName extends ConfigurableStop{
+  override val authorEmail: String = "yangqidong@cnic.cn"
+  override val description: String = "Selecting files by file name"
+  override val inportList: List[String] = List(PortEnum.NonePort.toString)
+  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
+
+  var HDFSUrl:String=_
+  var HDFSPath:String=_
+  var selectionConditions:String =_
+
+  var fs: FileSystem=null
+  var pathARR:ArrayBuffer[String]=ArrayBuffer()
+  var selectArr:Array[String]=null
+
+  def selectFile(path: String): Unit = {
+    val statusesARR: Array[FileStatus] = fs.listStatus(new Path(path))
+    for(each <- statusesARR){
+      val pathStr = each.getPath.toString
+      if(each.isFile){
+        val fileName: String = pathStr.split("/").last
+        selectArr = selectionConditions.split(",")
+        var b: Boolean =false
+        for(x <- selectArr){
+          b = Pattern.matches(x,fileName)
+          if(b){
+            pathARR += pathStr
+          }
+        }
+      }else{
+        selectFile(pathStr)
+      }
+    }
+  }
+
+  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
+
+    val session: SparkSession = pec.get[SparkSession]()
+
+    val configuration: Configuration = new Configuration()
+    configuration.set("fs.defaultFS", HDFSUrl)
+    fs = FileSystem.get(configuration)
+
+    selectFile(HDFSPath)
+
+    val rows: List[Row] = pathARR.map(each => {
+      var arr:Array[String]=Array(each)
+      val row: Row = Row.fromSeq(arr)
+      row
+    }).toList
+    val rowRDD: RDD[Row] = session.sparkContext.makeRDD(rows)
+    val fields: Array[StructField] = "path".split("/").map(d=>StructField(d,StringType,nullable = true))
+    val schema: StructType = StructType(fields)
+    val df: DataFrame = session.createDataFrame(rowRDD,schema)
+
+
+    println("#################################################")
+    df.show(20)
+    println("#################################################")
+
+    out.write(df)
+  }
+
+  override def setProperties(map: Map[String, Any]): Unit = {
+    HDFSUrl=MapUtil.get(map,key="HDFSUrl").asInstanceOf[String]
+    HDFSPath=MapUtil.get(map,key="HDFSPath").asInstanceOf[String]
+    selectionConditions=MapUtil.get(map,key="selectionConditions").asInstanceOf[String]
+  }
+
+
+  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
+    var descriptor : List[PropertyDescriptor] = List()
+    val HDFSUrl = new PropertyDescriptor().name("HDFSUrl").displayName("HDFSUrl").defaultValue("The URL of the HDFS file system, such as hdfs://10.0.88.70:9000").required(true)
+    val HDFSPath = new PropertyDescriptor().name("HDFSPath").displayName("HDFSPath").defaultValue("The save path of the HDFS file system, such as /test/Ab").required(true)
+    val selectionConditions = new PropertyDescriptor().name("selectionConditions").displayName("selectionConditions").defaultValue("To select conditions, you need to fill in regular expressions in java, such as. * abc. *").required(true)
+    descriptor = HDFSUrl :: descriptor
+    descriptor = HDFSPath :: descriptor
+    descriptor = selectionConditions :: descriptor
+    descriptor
+  }
+
+  override def getIcon(): Array[Byte] = {
+    ImageUtil.getImage("ftp.png")
+  }
+
+  override def getGroup(): List[String] = {
+    List(StopGroupEnum.FtpGroup.toString)
+  }
+
+  override def initialize(ctx: ProcessContext): Unit = {
+
+  }
+
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnzipFilesOnHDFS.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnzipFilesOnHDFS.scala
index 85d0eac..e3f7719 100644
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnzipFilesOnHDFS.scala
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/http/UnzipFilesOnHDFS.scala
@@ -12,178 +12,122 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 
+import scala.collection.mutable.ArrayBuffer
+
 class UnzipFilesOnHDFS extends ConfigurableStop {
   val authorEmail: String = "yangqidong@cnic.cn"
   val description: String = "Unzip files on HDFS"
-  val inportList: List[String] = List(PortEnum.NonePort.toString)
+  val inportList: List[String] = List(PortEnum.DefaultPort.toString)
   val outportList: List[String] = List(PortEnum.DefaultPort.toString)
 
+  var isCustomize:String=_
   var filePath:String=_
   var fileType:String=_
   var unzipPath:String=_
 
-  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
 
-    val session: SparkSession = pec.get[SparkSession]()
+  var session: SparkSession = null
+
+  def unzipFile(hdfsFilePath: String, zipFileType: String, unzipHdfsPath: String):String = {
+    var zft: String = ""
+    if(zipFileType.length < 1){
+      zft = hdfsFilePath.split("\\.").last
+    }else{
+      zft = zipFileType
+    }
 
     val configuration: Configuration = new Configuration()
-    val pathARR: Array[String] = filePath.split("\\/")
+    val pathARR: Array[String] = hdfsFilePath.split("\\/")
     var hdfsUrl:String=""
     for (x <- (0 until 3)){
 
       hdfsUrl+=(pathARR(x) +"/")
     }
     configuration.set("fs.defaultFS",hdfsUrl)
-    //    configuration.set("dfs.nameservices", "nameservice1")
-    //        configuration.set("dfs.ha.namenodes.nameservice1", "nn1,nn2");
-    //        configuration.set("dfs.namenode.rpc-address.nameservice1.nn1", "xxx:8020");
-    //        configuration.set("dfs.namenode.rpc-address.nameservice1.nn2", "xxx:8020");
-    //        configuration.set("dfs.client.failover.proxy.provider.nameservice1"
-    //                ,"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
 
-    //        configuration.addResource("classpath:/hadoop/core-site.xml");
-    //        configuration.addResource("classpath:/hadoop/hdfs-site.xml");
-    //        configuration.addResource("classpath:/hadoop/mapred-site.xml");
+    var uhp : String=""
+    if(unzipHdfsPath.length < 1){
+      for (x <- (0 until pathARR.length-1)){
+        uhp+=(pathARR(x) +"/")
+      }
+    }else{
+      uhp=unzipHdfsPath
+    }
 
     val fs = FileSystem.get(configuration)
-    val fdis: FSDataInputStream = fs.open(new Path(filePath))
-
-
-    val filePathArr: Array[String] = filePath.split("/")
+    val fdis: FSDataInputStream = fs.open(new Path(hdfsFilePath))
+    val filePathArr: Array[String] = hdfsFilePath.split("/")
     var fileName: String = filePathArr.last
     if(fileName.length == 0){
       fileName = filePathArr(filePathArr.size-2)
     }
 
-    if(fileType.equals("gz")){
+    var savePath:String=""
 
+    if(zft.equals("gz")){
       val gzip: GZIPInputStream = new GZIPInputStream(fdis)
       var n = -1
       val buf=new Array[Byte](10*1024*1024)
-      val savePath = new Path(unzipPath +fileName.replace(".gz",""))
-      val fdos = fs.create(savePath)
+      savePath = uhp +fileName.replace(".gz","")
+      val path = new Path(savePath)
+      val fdos = fs.create(path)
       while((n=gzip.read(buf)) != -1 && n != -1){
         fdos.write(buf,0,n)
         fdos.flush()
       }
-
-
-    }/*else if(fileType.equals("tar")){
-
-      var entryNum:Int=0
-      var entryFileName:String=null
-      var entryFile:File=null
-      var subEntryFile:File=null
-      var subEntryFileName:String=null
-      var tarArchiveEntries:Array[TarArchiveEntry]=null
-      var fileList:List[String]=List()
-      var fos:FileOutputStream=null
-
-      var entry: TarArchiveEntry = null
-      val tarIs: TarArchiveInputStream = new TarArchiveInputStream(fdis)
-      while ((entry = tarIs.getNextTarEntry) != null && entry != null) {
-        entryFileName= localPath +File.separator+entry.getName()
-        entryFile=new File(entryFileName)
-        entryNum += 1
-        if(entry.isDirectory()){
-          if(!entryFile.exists()){
-            entryFile.mkdirs()
-          }
-          tarArchiveEntries=entry.getDirectoryEntries()
-          for(i<-0 until tarArchiveEntries.length){
-            subEntryFileName=entryFileName+File.separator+tarArchiveEntries(i).getName()
-            subEntryFile=new File(subEntryFileName)
-            fileList=subEntryFileName::fileList
-            fos=new FileOutputStream(subEntryFile)
-            var mark = -1
-            val buf=new Array[Byte](4*1024)
-            while((mark=tarIs.read(buf)) != -1 && mark != -1){
-              fos.write(buf,0,mark)
-            }
-            fos.close()
-            fos=null
-          }
-        }else{
-          fileList = entryFileName :: fileList
-          fos=new FileOutputStream(entryFile)
-          var mark = -1
-          val buf=new Array[Byte](4*1024)
-          while((mark=tarIs.read(buf)) != -1 && mark != -1){
-            fos.write(buf,0,mark)
-          }
-          fos.close()
-          fos=null
-        }
-
-      }
-      if(entryNum==0){
-        println("there is no file!")
-      }
-
-    }else if(fileType.equals("tar.gz")){
-
-      var entryNum:Int=0
-      var entryFileName:String=null
-      var entryFile:File=null
-      var subEntryFile:File=null
-      var subEntryFileName:String=null
-      var tarArchiveEntries:Array[TarArchiveEntry]=null
-      var fileList:List[String]=List()
-      var fos:FileOutputStream=null
-
-      var entry: TarArchiveEntry = null
-      val gzip:GZIPInputStream=new GZIPInputStream(fdis)
-      val tarIs: TarArchiveInputStream = new TarArchiveInputStream(gzip)
-      while ((entry = tarIs.getNextTarEntry) != null && entry != null) {
-        entryFileName=localPath +File.separator+entry.getName()
-        entryFile=new File(entryFileName)
-        entryNum += 1
-        if(entry.isDirectory()){
-          if(!entryFile.exists()){
-            entryFile.mkdirs()
-          }
-          tarArchiveEntries=entry.getDirectoryEntries()
-          for(i<-0 until tarArchiveEntries.length){
-            subEntryFileName=entryFileName+File.separator+tarArchiveEntries(i).getName()
-            subEntryFile=new File(subEntryFileName)
-            fileList=subEntryFileName::fileList
-            fos=new FileOutputStream(subEntryFile)
-            var mark = -1
-            val buf=new Array[Byte](4*1024)
-            while((mark=tarIs.read(buf)) != -1 && mark != -1){
-              fos.write(buf,0,mark)
-            }
-            fos.close()
-            fos=null
-          }
-        }else{
-          fileList = entryFileName :: fileList
-          fos=new FileOutputStream(entryFile)
-          var mark = -1
-          val buf=new Array[Byte](4*1024)
-          while((mark=tarIs.read(buf)) != -1 && mark != -1){
-            fos.write(buf,0,mark)
-          }
-          fos.close()
-          fos=null
-        }
-
-      }
-      if(entryNum==0){
-        println("there is no file!")
-      }
-    }*/else{
+      fdos.close()
+      gzip.close()
+      fdis.close()
+    }else{
       throw new RuntimeException("File type fill in error, or do not support this type.")
     }
 
-    var seq:Seq[String]=Seq(unzipPath)
-    val row: Row = Row.fromSeq(seq)
-    val list:List[Row]=List(row)
-    val rdd: RDD[Row] = session.sparkContext.makeRDD(list)
+    savePath
+
+  }
+
+  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
+
+    session = pec.get[SparkSession]()
+
+    var savePath: String = ""
+    var arr:ArrayBuffer[Row]=ArrayBuffer()
+
+
+    if(isCustomize.equals("true")){
+      println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+
+      savePath = unzipFile(filePath,fileType,unzipPath)
+
+
+      println("savepath  :  "+savePath)
+
+      arr += Row.fromSeq(Array(savePath))
+
+    }else if (isCustomize.equals("false")){
+
+      val inDf: DataFrame = in.read()
+      inDf.collect().foreach(row => {
+
+        filePath = row.get(0).asInstanceOf[String]
+        savePath = unzipFile(filePath,"","")
+        arr += Row.fromSeq(Array(savePath))
+        savePath = ""
+
+      })
+
+    }
+
+    val rdd: RDD[Row] = session.sparkContext.makeRDD(arr.toList)
     val fields: Array[StructField] =Array(StructField("unzipPath",StringType,nullable = true))
     val schema: StructType = StructType(fields)
     val df: DataFrame = session.createDataFrame(rdd,schema)
 
+    println("##################################################################################################")
+//    println(df.count())
+    df.show(20)
+    println("##################################################################################################")
+
     out.write(df)
 
   }
@@ -193,6 +137,7 @@ class UnzipFilesOnHDFS extends ConfigurableStop {
   }
 
   def setProperties(map : Map[String, Any]) = {
+    isCustomize=MapUtil.get(map,key="isCustomize").asInstanceOf[String]
     filePath=MapUtil.get(map,key="filePath").asInstanceOf[String]
     fileType=MapUtil.get(map,key="fileType").asInstanceOf[String]
     unzipPath=MapUtil.get(map,key="unzipPath").asInstanceOf[String]
@@ -201,9 +146,15 @@ class UnzipFilesOnHDFS extends ConfigurableStop {
   override def getPropertyDescriptor(): List[PropertyDescriptor] = {
     var descriptor : List[PropertyDescriptor] = List()
 
-    val filePath = new PropertyDescriptor().name("filePath").displayName("filePath").description("file path,such as hdfs://10.0.86.89:9000/a/a.gz").defaultValue("").required(true)
-    val fileType = new PropertyDescriptor().name("fileType").displayName("fileType").description("file type,such as gz").defaultValue("").required(true)
+    val filePath = new PropertyDescriptor().name("filePath").displayName("filePath").description("file path,such as hdfs://10.0.86.89:9000/a/a.gz").defaultValue("").required(false)
+    val fileType = new PropertyDescriptor().name("fileType").displayName("fileType").description("file type,such as gz").defaultValue("").required(false)
     val unzipPath = new PropertyDescriptor().name("unzipPath").displayName("unzipPath").description("unzip path, such as hdfs://10.0.86.89:9000/b/").defaultValue("").required(true)
+    val isCustomize = new PropertyDescriptor().name("isCustomize").displayName("isCustomize").description("Whether to customize the compressed file path, if true, " +
+                                                                                                          "you must specify the path where the compressed file is located and the saved path after decompression. " +
+                                                                                                          "If it is fals, it will automatically find the file path data from the upstream port and " +
+                                                                                                          "save it to the original folder after decompression.")
+                                                                                                          .defaultValue("").required(false)
+    descriptor = isCustomize :: descriptor
     descriptor = filePath :: descriptor
     descriptor = fileType :: descriptor
     descriptor = unzipPath :: descriptor
@@ -216,7 +167,7 @@ class UnzipFilesOnHDFS extends ConfigurableStop {
   }
 
   override def getGroup(): List[String] = {
-    List(StopGroup.HttpGroup.toString)
+    List(StopGroupEnum.HttpGroup.toString)
   }
 
 }
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblParser.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblParser.scala
new file mode 100644
index 0000000..f1adbe5
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/EmblParser.scala
@@ -0,0 +1,168 @@
+package cn.piflow.bundle.microorganism
+
+import java.io._
+
+import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
+import cn.piflow.conf.bean.PropertyDescriptor
+import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.{ConfigurableStop, PortEnum}
+import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.biojavax.bio.seq.{RichSequence, RichSequenceIterator}
+import org.json.JSONObject
+
+class EmblParser extends ConfigurableStop{
+  override val authorEmail: String = "yangqidong@cnic.cn"
+  override val description: String = "Parsing EMBL type data"
+  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
+  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
+
+
+
+  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
+
+    val session = pec.get[SparkSession]()
+
+    val inDf: DataFrame = in.read()
+    val configuration: Configuration = new Configuration()
+
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
+    var hdfsUrl:String=""
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
+    }
+    configuration.set("fs.defaultFS",hdfsUrl)
+    var fs: FileSystem = FileSystem.get(configuration)
+
+    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
+    val path: Path = new Path(hdfsPathTemporary)
+
+    if(fs.exists(path)){
+      fs.delete(path)
+    }
+
+    fs.create(path).close()
+    var fdos: FSDataOutputStream = fs.append(path)
+
+    var jsonStr: String =""
+
+    var bis: BufferedInputStream =null
+
+    //    var df: DataFrame =null
+    //    var d: DataFrame =null
+    //    var jsonRDD: RDD[String] =null
+
+    inDf.collect().foreach(row => {
+
+      var n : Int =0
+      pathStr = row.get(0).asInstanceOf[String]
+
+      println("#############################################")
+      println("start parser ^^^" + pathStr)
+      println("#############################################")
+
+//      if(pathStr.equals("hdfs://10.0.88.70:9000/yqd/weishengwu/refseq/bacteria.1.genomic.gbff")) {
+
+
+        var fdis: FSDataInputStream = fs.open(new Path(pathStr))
+        //        var fdis: FSDataInputStream = fs.open(new Path("hdfs://10.0.88.70:9000/yqd/weishengwu/refseq/bacteria.1.1.genomic.fna.gz"))
+
+        //        var gzipout: GZIPInputStream = new GZIPInputStream(fdis)
+
+        //        var br: BufferedReader = new BufferedReader(new InputStreamReader(gzipout))
+
+        var br: BufferedReader = new BufferedReader(new InputStreamReader(fdis))
+
+        var sequences: RichSequenceIterator = CustomIOTools.IOTools.readEMBLDNA (br, null)
+
+        while (sequences.hasNext) {
+          n += 1
+          var seq: RichSequence = sequences.nextRichSequence()
+          var doc: JSONObject = new JSONObject
+          Process.processEMBL_EnsemblSeq(seq, doc)
+          jsonStr = doc.toString
+          println("start " + n)
+
+          if (n == 1) {
+            bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
+          } else {
+            bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
+          }
+
+          val buff: Array[Byte] = new Array[Byte](1048576)
+
+          var count: Int = bis.read(buff)
+          while (count != -1) {
+            fdos.write(buff, 0, count)
+            fdos.flush()
+            count = bis.read(buff)
+          }
+
+          /*   if(n==1){
+            jsonRDD = session.sparkContext.makeRDD(jsonStr :: Nil)
+            df = session.read.json(jsonRDD)
+          }else{
+            jsonRDD = session.sparkContext.makeRDD(jsonStr :: Nil)
+            d = session.read.json(jsonRDD)
+            df = df.union(d.toDF(df.columns:_*))
+          }*/
+
+          fdos.flush()
+          bis = null
+          seq = null
+          doc = null
+          //          jsonRDD = null
+          //          d = null
+        }
+        bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
+        val buff: Array[Byte] = new Array[Byte](1048576)
+
+        var count: Int = bis.read(buff)
+        while (count != -1) {
+          fdos.write(buff, 0, count)
+          fdos.flush()
+          count = bis.read(buff)
+        }
+        fdos.flush()
+//      }
+    })
+
+    fdos.close()
+
+    println("start parser HDFSjsonFile")
+    val df: DataFrame = session.read.json(hdfsPathTemporary)
+
+    println("############################################################")
+    //    println(df.count())
+    df.show(20)
+    println("############################################################")
+    out.write(df)
+
+
+  }
+
+  override def setProperties(map: Map[String, Any]): Unit = {
+
+  }
+
+  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+    var descriptor : List[PropertyDescriptor] = List()
+    descriptor
+  }
+
+  override def getIcon(): Array[Byte] = {
+    ImageUtil.getImage("/microorganism/EMBL_Logo.svg")
+  }
+
+  override def getGroup(): List[String] = {
+    List(StopGroupEnum.MicroorganismGroup.toString)
+  }
+
+  override def initialize(ctx: ProcessContext): Unit = {
+
+  }
+
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqParser.scala b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqParser.scala
new file mode 100644
index 0000000..c82eb54
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/RefseqParser.scala
@@ -0,0 +1,168 @@
+package cn.piflow.bundle.microorganism
+
+import java.io._
+
+import cn.piflow.bundle.microorganism.util.{CustomIOTools, Process}
+import cn.piflow.conf.bean.PropertyDescriptor
+import cn.piflow.conf.util.ImageUtil
+import cn.piflow.conf.{ConfigurableStop, PortEnum}
+import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.biojavax.bio.seq.{RichSequence, RichSequenceIterator}
+import org.json.JSONObject
+
+class RefseqParser extends ConfigurableStop{
+  override val authorEmail: String = "yangqidong@cnic.cn"
+  override val description: String = "Parsing Refseq_genome type data"
+  override val inportList: List[String] =List(PortEnum.DefaultPort.toString)
+  override val outportList: List[String] = List(PortEnum.DefaultPort.toString)
+
+
+
+  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
+
+    val session = pec.get[SparkSession]()
+
+    val inDf: DataFrame = in.read()
+    val configuration: Configuration = new Configuration()
+
+    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]
+    val pathARR: Array[String] = pathStr.split("\\/")
+    var hdfsUrl:String=""
+    for (x <- (0 until 3)){
+      hdfsUrl+=(pathARR(x) +"/")
+    }
+    configuration.set("fs.defaultFS",hdfsUrl)
+    var fs: FileSystem = FileSystem.get(configuration)
+
+    val hdfsPathTemporary:String = hdfsUrl+"/Refseq_genomeParser_temporary.json"
+    val path: Path = new Path(hdfsPathTemporary)
+
+    if(fs.exists(path)){
+      fs.delete(path)
+    }
+
+    fs.create(path).close()
+    var fdos: FSDataOutputStream = fs.append(path)
+
+    var jsonStr: String =""
+
+    var bis: BufferedInputStream =null
+
+//    var df: DataFrame =null
+//    var d: DataFrame =null
+//    var jsonRDD: RDD[String] =null
+
+    inDf.collect().foreach(row => {
+
+      var n : Int =0
+      pathStr = row.get(0).asInstanceOf[String]
+
+      println("#############################################")
+      println("start parser ^^^" + pathStr)
+      println("#############################################")
+
+//      if(pathStr.equals("hdfs://10.0.88.70:9000/yqd/weishengwu/refseq/bacteria.1.genomic.gbff")) {
+
+
+        var fdis: FSDataInputStream = fs.open(new Path(pathStr))
+//        var fdis: FSDataInputStream = fs.open(new Path("hdfs://10.0.88.70:9000/yqd/weishengwu/refseq/bacteria.1.1.genomic.fna.gz"))
+
+//        var gzipout: GZIPInputStream = new GZIPInputStream(fdis)
+
+//        var br: BufferedReader = new BufferedReader(new InputStreamReader(gzipout))
+
+      var br: BufferedReader = new BufferedReader(new InputStreamReader(fdis))
+
+      var sequences: RichSequenceIterator = CustomIOTools.IOTools.readGenbankProtein(br, null)
+
+        while (sequences.hasNext) {
+          n += 1
+          var seq: RichSequence = sequences.nextRichSequence()
+          var doc: JSONObject = new JSONObject
+          Process.processSingleSequence(seq, doc)
+          jsonStr = doc.toString
+          println("start " + n)
+
+          if (n == 1) {
+            bis = new BufferedInputStream(new ByteArrayInputStream(("[" + jsonStr).getBytes()))
+          } else {
+            bis = new BufferedInputStream(new ByteArrayInputStream(("," + jsonStr).getBytes()))
+          }
+
+          val buff: Array[Byte] = new Array[Byte](1048576)
+
+          var count: Int = bis.read(buff)
+          while (count != -1) {
+            fdos.write(buff, 0, count)
+            fdos.flush()
+            count = bis.read(buff)
+          }
+
+          /*   if(n==1){
+            jsonRDD = session.sparkContext.makeRDD(jsonStr :: Nil)
+            df = session.read.json(jsonRDD)
+          }else{
+            jsonRDD = session.sparkContext.makeRDD(jsonStr :: Nil)
+            d = session.read.json(jsonRDD)
+            df = df.union(d.toDF(df.columns:_*))
+          }*/
+
+          fdos.flush()
+          bis = null
+          seq = null
+          doc = null
+          //          jsonRDD = null
+          //          d = null
+        }
+        bis = new BufferedInputStream(new ByteArrayInputStream(("]").getBytes()))
+        val buff: Array[Byte] = new Array[Byte](1048576)
+
+        var count: Int = bis.read(buff)
+        while (count != -1) {
+          fdos.write(buff, 0, count)
+          fdos.flush()
+          count = bis.read(buff)
+        }
+        fdos.flush()
+//      }
+    })
+
+    fdos.close()
+
+    println("start parser HDFSjsonFile")
+    val df: DataFrame = session.read.json(hdfsPathTemporary)
+
+    println("############################################################")
+//    println(df.count())
+    df.show(20)
+    println("############################################################")
+    out.write(df)
+
+
+  }
+
+  override def setProperties(map: Map[String, Any]): Unit = {
+
+  }
+
+  override def getPropertyDescriptor(): List[PropertyDescriptor] ={
+  var descriptor : List[PropertyDescriptor] = List()
+  descriptor
+}
+
+  override def getIcon(): Array[Byte] = {
+    ImageUtil.getImage("/microorganism/refseq.png")
+  }
+
+  override def getGroup(): List[String] = {
+    List(StopGroupEnum.MicroorganismGroup.toString)
+  }
+
+  override def initialize(ctx: ProcessContext): Unit = {
+
+  }
+
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEMBLFormat.java b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEMBLFormat.java
new file mode 100644
index 0000000..f4cd463
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEMBLFormat.java
@@ -0,0 +1,1151 @@
+package cn.piflow.bundle.microorganism.util;
+
+import org.biojava.bio.seq.Sequence;
+import org.biojava.bio.seq.io.ParseException;
+import org.biojava.bio.seq.io.SeqIOListener;
+import org.biojava.bio.seq.io.SymbolTokenization;
+import org.biojava.bio.symbol.IllegalSymbolException;
+import org.biojava.bio.symbol.SimpleSymbolList;
+import org.biojava.bio.symbol.Symbol;
+import org.biojava.bio.symbol.SymbolList;
+import org.biojava.utils.ChangeVetoException;
+import org.biojavax.*;
+import org.biojavax.bio.seq.MultiSourceCompoundRichLocation;
+import org.biojavax.bio.seq.RichFeature;
+import org.biojavax.bio.seq.RichLocation;
+import org.biojavax.bio.seq.RichSequence;
+import org.biojavax.bio.seq.io.GenbankLocationParser;
+import org.biojavax.bio.seq.io.RichSeqIOListener;
+import org.biojavax.bio.seq.io.RichSequenceFormat;
+import org.biojavax.bio.taxa.NCBITaxon;
+import org.biojavax.bio.taxa.SimpleNCBITaxon;
+import org.biojavax.ontology.ComparableTerm;
+import org.biojavax.utils.StringTools;
+
+import java.io.*;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by xiujuan on 2016/1/27.
+ */
+public class CustomEMBLFormat extends RichSequenceFormat.HeaderlessFormat {
+    // Register this format with the format auto-guesser.
+    static {
+        RichSequence.IOTools.registerFormat(CustomEMBLFormat.class);
+    }
+
+    /**
+     * The name of the Pre-87 format
+     */
+    public static final String EMBL_PRE87_FORMAT = "EMBL_PRE87";
+
+    /**
+     * The name of the current format
+     */
+    public static final String EMBL_FORMAT = "EMBL";
+
+    protected static final String LOCUS_TAG = "ID";
+    protected static final String ACCESSION_TAG = "AC";
+    protected static final String VERSION_TAG = "SV";
+    protected static final String DEFINITION_TAG = "DE";
+    protected static final String DATE_TAG = "DT";
+    protected static final String DATABASE_XREF_TAG = "DR";
+    protected static final String SOURCE_TAG = "OS";
+    protected static final String ORGANISM_TAG = "OC";
+    protected static final String ORGANELLE_TAG = "OG";
+    protected static final String REFERENCE_TAG = "RN";
+    protected static final String REFERENCE_POSITION_TAG = "RP";
+    protected static final String REFERENCE_XREF_TAG = "RX";
+    protected static final String AUTHORS_TAG = "RA";
+    protected static final String CONSORTIUM_TAG = "RG";
+    protected static final String TITLE_TAG = "RT";
+    protected static final String LOCATOR_TAG = "RL";
+    protected static final String REMARK_TAG = "RC";
+    protected static final String KEYWORDS_TAG = "KW";
+    protected static final String COMMENT_TAG = "CC";
+    protected static final String FEATURE_HEADER_TAG = "FH";
+    protected static final String FEATURE_TAG = "FT";
+    protected static final String CONTIG_TAG = "CO";
+    protected static final String TPA_TAG = "AH";
+    protected static final String START_SEQUENCE_TAG = "SQ";
+    protected static final String DELIMITER_TAG = "XX";
+    protected static final String END_SEQUENCE_TAG = "//";
+
+    // the date pattern
+    // date (Rel. N, Created)
+    // date (Rel. N, Last updated, Version M)
+    protected static final Pattern dp = Pattern.compile("([^\\s]+)\\s*(\\(Rel\\.\\s+(\\d+), ([^\\)\\d]+)(\\d*)\\))?$");
+    // locus line
+    protected static final Pattern lp = Pattern.compile("^(\\S+);\\s+SV\\s+(\\d+);\\s+(linear|circular);\\s+(\\S+\\s?\\S+?);\\s+(\\S+);\\s+(\\S+);\\s+(\\d+)\\s+(BP|AA)\\.$");
+    protected static final Pattern lpPre87 = Pattern.compile("^(\\S+)\\s+standard;\\s+(circular)?\\s*(genomic)?\\s*(\\S+);\\s+(\\S+);\\s+\\d+\\s+BP\\.$");
+    // version line
+    protected static final Pattern vp = Pattern.compile("^(\\S+?)\\.(\\d+)$");
+    // reference position line
+    protected static final Pattern rpp = Pattern.compile("^(\\d+)(-(\\d+))?,?(\\s?\\d+-\\d+,?)*$");
+    // dbxref line
+    protected static final Pattern dbxp = Pattern.compile("^([^:]+):(\\S+)$");
+
+    protected static final Pattern readableFileNames = Pattern.compile(".*\\u002e(em|dat).*");
+    protected static final Pattern headerLine = Pattern.compile("^ID.*");
+
+    private NCBITaxon tax = null;
+    private String organism = null;
+    private String accession = null;
+
+    /**
+     * Implements some EMBL-specific terms.
+     */
+    public static class Terms extends RichSequence.Terms {
+
+        /**
+         * Getter for the RelUpdatedRecordVersion term
+         * @return The RelUpdatedRecordVersion Term
+         */
+        public static ComparableTerm getRelUpdatedRecordVersionTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("RelUpdatedRecordVersion");
+        }
+
+        /**
+         * Getter for the EMBL term
+         * @return The EMBL Term
+         */
+        public static ComparableTerm getEMBLTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("EMBL");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'genomic' term
+         * @return The genomic Term
+         */
+        public static ComparableTerm getGenomicTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("genomic");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'versionLine' term
+         * @return The version line Term
+         */
+        public static ComparableTerm getVersionLineTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("versionLine");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'dataClass' term
+         * @return The data class Term
+         */
+        public static ComparableTerm getDataClassTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("dataClass");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'organism' term
+         * @return The organism Term - "ORGANISM_TAG"
+         * added by xiujuan 2016-1-28
+         */
+        public static ComparableTerm getOrganismTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("organism");
+        }
+
+        /**
+         * @return The length
+         * added by xiujuan 2016-1-28
+         */
+        public static ComparableTerm getLengthTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("length");
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     * A file is in EMBL format if its name contains the word eem or edat, or the first line matches
+     * the EMBL format for the ID line.
+     */
+    public boolean canRead(File file) throws IOException {
+        if (readableFileNames.matcher(file.getName()).matches()) return true;
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp.matcher(firstLine.substring(3).trim()).matches() ||
+                        lpPre87.matcher(firstLine.substring(3).trim()).matches()
+                );
+        br.close();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a DNA tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
+        return RichSequence.IOTools.getDNAParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     * A stream is in EMBL format if its first line matches the EMBL format for the ID line.
+     */
+    public boolean canRead(BufferedInputStream stream) throws IOException {
+        stream.mark(2000); // some streams may not support this
+        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp.matcher(firstLine.substring(3).trim()).matches() ||
+                        lpPre87.matcher(firstLine.substring(3).trim()).matches()
+                );
+        // don't close the reader as it'll close the stream too.
+        // br.close();
+        stream.reset();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a DNA tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(BufferedInputStream stream) throws IOException {
+        return RichSequence.IOTools.getDNAParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readSequence(BufferedReader reader,
+                                SymbolTokenization symParser,
+                                SeqIOListener listener)
+            throws IllegalSymbolException, IOException, ParseException {
+        if (!(listener instanceof RichSeqIOListener)) throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
+        return this.readRichSequence(reader,symParser,(RichSeqIOListener)listener,null);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readRichSequence(BufferedReader reader,
+                                    SymbolTokenization symParser,
+                                    RichSeqIOListener rlistener,
+                                    Namespace ns)
+            throws IllegalSymbolException, IOException, ParseException {
+        tax = null;
+        organism = null;
+        accession = null;
+        boolean hasAnotherSequence = true;
+        //boolean hasInternalWhitespace = false;
+
+        rlistener.startSequence();
+
+        if (ns==null) ns=RichObjectFactory.getDefaultNamespace();
+        rlistener.setNamespace(ns);
+
+        // Get an ordered list of key->value pairs in array-tuples
+        String sectionKey = null;
+        do {
+            List section = this.readSection(reader);
+            sectionKey = ((String[])section.get(0))[0];
+            if(sectionKey == null){
+
+                String message = ParseException.newMessage(this.getClass(), accession, "No section key", "Not set", sectionToString(section));
+                System.err.println("error happens: " + message);
+                throw new ParseException(message);
+            }
+            // process section-by-section
+            if (sectionKey.equals(LOCUS_TAG)) {
+                // entryname  dataclass; [circular] molecule; division; sequencelength BP.
+                String loc = ((String[])section.get(0))[1];
+                Matcher m = lp.matcher(loc);
+                Matcher mPre87 = lpPre87.matcher(loc);
+                if (m.matches()) {
+                    // first token is both name and primary accession
+                    rlistener.setName(m.group(1));
+                    rlistener.setAccession(m.group(1));
+                    // second token is version
+                    rlistener.setVersion(Integer.parseInt(m.group(2)));
+                    // third token is circular/linear
+                    rlistener.setCircular(m.group(3).equals("circular"));
+                    // fourth token is moltype
+                    rlistener.addSequenceProperty(Terms.getMolTypeTerm(),m.group(4));
+                    // fifth token is data class
+                    rlistener.addSequenceProperty(Terms.getDataClassTerm(),m.group(5));
+                    // sixth token is taxonomic division
+                    rlistener.setDivision(m.group(6));
+                    // seventh token is sequence length, which is ignored
+                    // as it is calculated from the sequence data later.
+                } else if (mPre87.matches()) {
+                    rlistener.setName(mPre87.group(1));
+                    if (mPre87.group(3)!=null) {
+                        // add annotation for 'genomic' (Ensembl-specific term)
+                        rlistener.addSequenceProperty(Terms.getGenomicTerm(),null);
+                    }
+                    rlistener.addSequenceProperty(Terms.getMolTypeTerm(),mPre87.group(4));
+                    rlistener.setDivision(mPre87.group(5));
+                    // Optional extras
+                    String circular = mPre87.group(2);
+                    if (circular!=null) rlistener.setCircular(true);
+                } else {
+                    String message = ParseException.newMessage(this.getClass(),accession,"Not Set","Bad ID line found", sectionToString(section));
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(message);
+                }
+            } else if (sectionKey.equals(DEFINITION_TAG)) {
+                rlistener.setDescription(((String[])section.get(0))[1]);
+            } else if (sectionKey.equals(SOURCE_TAG)) {
+                // only interested in organelle sub-tag
+                for (int i = 1; i < section.size(); i++) {
+                    sectionKey = ((String[])section.get(i))[0];
+                    if (sectionKey.equals(ORGANELLE_TAG)) {
+                        rlistener.addSequenceProperty(Terms.getOrganelleTerm(), ((String[])section.get(i))[1].trim());
+                        break; // skip out of for loop once found
+                    }
+                    if(sectionKey.equals(ORGANISM_TAG)){
+                        rlistener.addSequenceProperty(Terms.getOrganismTerm(), ((String[])section.get(i))[1].trim());
+                        break;
+                    }
+                }
+            } else if (sectionKey.equals(DATE_TAG)) {
+                String chunk = ((String[])section.get(0))[1].trim();
+                Matcher dm = dp.matcher(chunk);
+                if (dm.matches()) {
+                    String date = dm.group(1);
+                    String rel = dm.group(3);
+                    String type = dm.group(4);
+                    if (type.equals("Created")) {
+                        rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), date);
+                        rlistener.addSequenceProperty(Terms.getRelCreatedTerm(), rel);
+                    } else if (type.equals("Last updated, Version ")) {
+                        rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(), date);
+                        rlistener.addSequenceProperty(Terms.getRelUpdatedTerm(), rel);
+                        rlistener.addSequenceProperty(Terms.getRelUpdatedRecordVersionTerm(), dm.group(5));
+                    } else {
+                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad date type found",sectionToString(section));
+                        System.err.println("error happens: " + message);
+                        throw new ParseException(message);
+                    }
+                } else {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad date line found",sectionToString(section));
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(message);
+
+                }
+            } else if (sectionKey.equals(ACCESSION_TAG)) {
+                // if multiple accessions, store only first as accession,
+                // and store rest in annotation
+                String[] accs = ((String[])section.get(0))[1].split(";");
+                accession = accs[0].trim();
+                rlistener.setAccession(accession);
+                for (int i = 1; i < accs.length; i++) {
+                    rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),accs[i].trim());
+                }
+            } else if (sectionKey.equals(VERSION_TAG)) {
+                String ver = ((String[])section.get(0))[1];
+                Matcher m = vp.matcher(ver);
+                if (m.matches()) {
+                    String verAcc = m.group(1);
+                    if (!accession.equals(verAcc)) {
+                        // the version refers to a different accession!
+                        // believe the version line, and store the original
+                        // accession away in the additional accession set
+                        rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),accession);
+                        accession = verAcc;
+                        rlistener.setAccession(accession);
+                    }
+                    rlistener.setVersion(Integer.parseInt(m.group(2)));
+                } else {
+                    rlistener.addSequenceProperty(Terms.getVersionLineTerm(),ver);
+                }
+            } else if (sectionKey.equals(KEYWORDS_TAG)) {
+                String val = ((String[])section.get(0))[1];
+                val = val.substring(0,val.length()-1); // chomp dot
+                val = val.replace('\n',' '); //remove newline
+                String[] kws = val.split(";");
+                for (int i = 0; i < kws.length; i++) {
+                    String kw = kws[i].trim();
+                    if (kw.length()==0) continue;
+                    rlistener.addSequenceProperty(Terms.getKeywordTerm(), kw);
+                }
+            } else if (sectionKey.equals(DATABASE_XREF_TAG)) {
+                String val = ((String[])section.get(0))[1];
+                val = val.substring(0,val.length()-1); // chomp dot
+                // database_identifier; primary_identifier; secondary_identifier....
+                String[] parts = val.split(";");
+                // construct a DBXREF out of the dbname part[0] and accession part[1]
+                CrossRef crossRef = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{parts[0].trim(),parts[1].trim(), new Integer(0)});
+                // assign remaining bits of info as annotations
+                for (int j = 2; j < parts.length; j++) {
+                    Note note = new SimpleNote(Terms.getAdditionalAccessionTerm(),parts[j].trim(),j-1);
+                    try {
+                        crossRef.getRichAnnotation().addNote(note);
+                    } catch (ChangeVetoException ce) {
+                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "Could not annotate identifier terms",sectionToString(section));
+                        ParseException pe = new ParseException(message);
+                        System.err.println("error happens: " + message);
+                        pe.initCause(ce);
+                        throw pe;
+                    }
+                }
+                RankedCrossRef rcrossRef = new SimpleRankedCrossRef(crossRef, 0);
+                rlistener.setRankedCrossRef(rcrossRef);
+            } else if (sectionKey.equals(REFERENCE_TAG) && !this.getElideReferences()) {
+                // first line of section has rank and location
+                String refrank = ((String[])section.get(0))[1];
+                int ref_rank = Integer.parseInt(refrank.substring(1,refrank.length()-1));
+                int ref_start = -999;
+                int ref_end = -999;
+                // rest can be in any order
+                String consortium = null;
+                String authors = "";
+                String title = null;
+                String locator = null;
+                String pubmed = null;
+                String medline = null;
+                String doi = null;
+                String remark = null;
+                for (int i = 1; i < section.size(); i++) {
+                    String key = ((String[])section.get(i))[0];
+                    String val = ((String[])section.get(i))[1];
+                    if (key.equals(AUTHORS_TAG)) {
+                        if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                        authors = val.replace('\n',' '); //see #2276
+                    }else if (key.equals(CONSORTIUM_TAG)) {
+                        if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                        consortium = val.replace('\n',' '); //see #2276
+                    }else if (key.equals(TITLE_TAG)) {
+                        if (val.length()>1) {
+                            if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                            if (val.endsWith("\"")) val = val.substring(1,val.length()-1); // chomp quotes
+                            title = val.replace('\n',' '); //see #2276
+                        } else title=null; // single semi-colon indicates no title
+                    }else if (key.equals(LOCATOR_TAG)) {
+                        if (val.endsWith(".")) val = val.substring(0,val.length()-1); // chomp dot
+                        locator = val.replace('\n',' '); //see #2276
+                    }else if (key.equals(REFERENCE_XREF_TAG)) {
+                        // database_identifier; primary_identifier.
+                        String[] refs = val.split("\\.(\\s+|$)");
+                        for (int j = 0 ; j < refs.length; j++) {
+                            if (refs[j].trim().length()==0) continue;
+                            String[] parts = refs[j].split(";");
+                            if(parts.length == 2){
+                                String db = parts[0];
+                                String ref = parts[1].trim();
+                                if (db.equalsIgnoreCase(Terms.PUBMED_KEY)) pubmed = ref;
+                                else if (db.equalsIgnoreCase(Terms.MEDLINE_KEY)) medline = ref;
+                                else if (db.equalsIgnoreCase(Terms.DOI_KEY)) doi = ref;
+                            }
+                        }
+                    }else if (key.equals(REMARK_TAG)) remark = val.replace('\n',' '); //see #2276
+                    else if (key.equals(REFERENCE_POSITION_TAG)) {
+                        // only the first group is taken
+                        // if we have multiple lines, only the last line is taken
+                        Matcher m = rpp.matcher(val);
+                        if (m.matches()) {
+                            ref_start = Integer.parseInt(m.group(1));
+                            if(m.group(2) != null)
+                                ref_end = Integer.parseInt(m.group(3));
+                        } else {
+                            String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad reference line found",sectionToString(section));
+                            System.err.println("error happens: " + message);
+                            throw new ParseException(message);
+                        }
+                    }
+                }
+                // create the docref object
+                try {
+                    List<DocRefAuthor> authSet = DocRefAuthor.Tools.parseAuthorString(authors);
+                    if (consortium!=null) authSet.add(new SimpleDocRefAuthor(consortium, true, false));
+                    DocRef dr = (DocRef)RichObjectFactory.getObject(SimpleDocRef.class,new Object[]{authSet,locator,title});
+                    // assign either the pubmed or medline to the docref - medline gets priority, then pubmed, then doi
+                    if (medline!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.MEDLINE_KEY, medline, new Integer(0)}));
+                    else if (pubmed!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.PUBMED_KEY, pubmed, new Integer(0)}));
+                    else if (doi!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.DOI_KEY, doi, new Integer(0)}));
+                    // assign the remarks
+                    if (!this.getElideComments()) dr.setRemark(remark);
+                    // assign the docref to the bioentry
+                    RankedDocRef rdr = new SimpleRankedDocRef(dr,
+                            (ref_start != -999 ? new Integer(ref_start) : null),
+                            (ref_end != -999 ? new Integer(ref_end) : null),
+                            ref_rank);
+                    rlistener.setRankedDocRef(rdr);
+                    rlistener.setRankedDocRef(rdr);
+                } catch (ChangeVetoException e) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(e, message);
+                }
+            } else if (sectionKey.equals(COMMENT_TAG) && !this.getElideComments()) {
+                // Set up some comments
+                rlistener.setComment(((String[])section.get(0))[1]);
+            } else if (sectionKey.equals(FEATURE_TAG) && !this.getElideFeatures()) {
+                // starting from second line of input, start a new feature whenever we come across
+                // a key that does not start with /
+                boolean seenAFeature = false;
+                int rcrossrefCount = 0;
+                boolean skippingBond = false;
+                for (int i = 1 ; i < section.size(); i++) {
+                    String key = ((String[])section.get(i))[0];
+                    String val = ((String[])section.get(i))[1];
+                    if (key.startsWith("/")) {
+                        if(!skippingBond){
+                            key = key.substring(1); // strip leading slash
+                            val = val.replaceAll("\\s*[\\n\\r]+\\s*"," ").trim();
+                            if (val.startsWith("\"")) val = val.substring(1,val.length()-1); // strip quotes
+                            // parameter on old feature
+                            if (key.equalsIgnoreCase("db_xref")) {
+                                Matcher m = dbxp.matcher(val);
+                                if (m.matches()) {
+                                    String dbname = m.group(1);
+                                    String raccession = m.group(2);
+                                    if (dbname.equalsIgnoreCase("taxon")) {
+                                        // Set the Taxon instead of a dbxref
+                                        tax = (NCBITaxon)RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{Integer.valueOf(raccession)});
+                                        rlistener.setTaxon(tax);
+                                        try {
+                                            if (organism!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
+                                        } catch (ChangeVetoException e) {
+                                            String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                            System.err.println("error happens: " + message);
+                                            throw new ParseException(e, message);
+                                        }
+                                    } else {
+                                        try {
+                                            CrossRef cr = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{dbname, raccession, new Integer(0)});
+                                            RankedCrossRef rcr = new SimpleRankedCrossRef(cr, ++rcrossrefCount);
+                                            rlistener.getCurrentFeature().addRankedCrossRef(rcr);
+                                        } catch (ChangeVetoException e) {
+                                            String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                            System.err.println("error happens: " + message);
+                                            throw new ParseException(e, message);
+                                        }
+                                    }
+                                } else {
+                                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad dbxref found",sectionToString(section));
+                                    System.err.println("error happens: " + message);
+                                    throw new ParseException(message);
+                                }
+                            } else if (key.equalsIgnoreCase("organism")) {
+                                try {
+                                    organism = val;
+                                    if (tax!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
+                                } catch (ChangeVetoException e) {
+                                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                    System.err.println("error happens: " + message);
+                                    throw new ParseException(message);
+                                }
+                            } else {
+                                if (key.equalsIgnoreCase("translation")) {
+                                    // strip spaces from sequence
+                                    val = val.replaceAll("\\s+","");
+                                }
+                                rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(key),val);
+                            }
+                        }
+                    } else {
+                        // new feature!
+                        // end previous feature
+                        if(key.equalsIgnoreCase("bond"))
+                        {
+                            skippingBond = true;
+                        }else{
+                            skippingBond = false;
+                            if (seenAFeature) {
+                                rlistener.endFeature();
+                            }
+                            // start next one, with lots of lovely info in it
+                            RichFeature.Template templ = new RichFeature.Template();
+                            templ.annotation = new SimpleRichAnnotation();
+                            templ.sourceTerm = Terms.getEMBLTerm();
+                            templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key);
+                            templ.featureRelationshipSet = new TreeSet();
+                            templ.rankedCrossRefs = new TreeSet();
+                            String tidyLocStr = val.replaceAll("\\s+","");
+                            templ.location = GenbankLocationParser.parseLocation(ns, accession, tidyLocStr);
+                            if(!(templ.location instanceof MultiSourceCompoundRichLocation)){
+                                rlistener.startFeature(templ);
+                                seenAFeature = true;
+                                rcrossrefCount = 0;
+                            }else{
+                                System.err.println("encounter a MultiSourceCompoundRichLocation instance");
+                                skippingBond = true;
+                                seenAFeature = false;
+                            }
+                        }
+                    }
+                }
+                if (seenAFeature) rlistener.endFeature();
+            } else if (sectionKey.equals(START_SEQUENCE_TAG) && !this.getElideSymbols()) {
+                StringBuffer seq = new StringBuffer();
+                for (int i = 0 ; i < section.size(); i++) seq.append(((String[])section.get(i))[1]);
+                try {
+                    SymbolList sl = new SimpleSymbolList(symParser,
+                            seq.toString().replaceAll("\\s+","").replaceAll("[\\.|~]","-"));
+                    rlistener.addSymbols(symParser.getAlphabet(),
+                            (Symbol[])(sl.toList().toArray(new Symbol[0])),
+                            0, sl.length());
+                } catch (Exception e) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad sequence",sectionToString(section));
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(e, message);
+                }
+            }
+        } while (!sectionKey.equals(END_SEQUENCE_TAG));
+
+        // Allows us to tolerate trailing whitespace without
+        // thinking that there is another Sequence to follow
+        while (true) {
+            reader.mark(1);
+            int c = reader.read();
+            if (c == -1) {
+                hasAnotherSequence = false;
+                break;
+            }
+            if (Character.isWhitespace((char) c)) {
+                //hasInternalWhitespace = true;
+                continue;
+            }
+            //if (hasInternalWhitespace)
+            //    System.err.println("Warning: whitespace found between sequence entries");
+            reader.reset();
+            break;
+        }
+
+        // Finish up.
+        rlistener.endSequence();
+        return hasAnotherSequence;
+    }
+
+    // reads an indented section, combining split lines and creating a list of key->value tuples
+    private List readSection(BufferedReader br) throws ParseException {
+        List section = new ArrayList();
+        String line;
+        boolean done = false;
+
+        // while not done
+        try {
+            while (!done) {
+                // mark buffer
+                br.mark(320);
+                // read token
+                line = br.readLine();
+                if (line.length()<2) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad line found",line);
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(message);
+                }
+                String token = line.substring(0,2);
+                // READ SEQUENCE SECTION
+                if (token.equals(START_SEQUENCE_TAG)) {
+                    //      from next line, read sequence until // - leave // on stack
+                    StringBuffer sb = new StringBuffer();
+                    while (!done) {
+                        br.mark(160);
+                        line = br.readLine();
+                        if (line.startsWith(END_SEQUENCE_TAG)) {
+                            br.reset();
+                            done = true;
+                        } else {
+                            //      create sequence tag->value pair to return, sans numbers
+                            sb.append(line.replaceAll("\\d",""));
+                        }
+                    }
+                    section.add(new String[]{START_SEQUENCE_TAG,sb.toString()});
+                }
+                // READ FEATURE TABLE SECTION
+                else if (token.equals(FEATURE_HEADER_TAG)) {
+                    //      create dummy feature tag->value pair and add to return set
+                    section.add(new String[]{FEATURE_TAG,null});
+                    //      drop next FH line
+                    line = br.readLine(); // skip next line too - it is also FH
+                    //      read all FT lines until XX
+                    String currentTag = null;
+                    StringBuffer currentVal = null;
+                    while (!done) {
+                        line = br.readLine();
+                        if (line.startsWith(DELIMITER_TAG)) {
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            //         FT lines:   FT   word            value
+                            //         or          FT                   /word
+                            //         or          FT                   /db_xref="taxon:3899....
+                            //                                          ......"
+                            line = line.substring(5); // chomp off "FT   "
+                            if (!line.startsWith(" ")) {
+                                // dump current tag if exists
+                                if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                // case 1 : word value - splits into key-value on its own
+                                String[] parts = line.trim().split("\\s+");
+                                currentTag = parts[0];
+                                currentVal = new StringBuffer();
+                                currentVal.append(parts[1]);
+                            } else {
+                                line = line.trim();
+                                if (line.startsWith("/")) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // case 2 : /word[=.....]
+                                    currentVal = new StringBuffer();
+                                    int equalIndex = line.indexOf('=');
+                                    if (equalIndex>=0) {
+                                        currentTag = line.substring(0, equalIndex);
+                                        currentVal.append(line.substring(equalIndex+1));
+                                    } else {
+                                        currentTag = line;
+                                    }
+                                } else {
+                                    // case 3 : ...."
+                                    currentVal.append("\n");
+                                    currentVal.append(line);
+                                }
+                            }
+                        }
+                    }
+                }
+                // READ END OF SEQUENCE
+                else if (token.equals(END_SEQUENCE_TAG)) {
+                    section.add(new String[]{END_SEQUENCE_TAG,null});
+                    done = true;
+                }
+                // READ DELIMITER TAG
+                else if (token.equals(DELIMITER_TAG)) {
+                    section.add(new String[]{DELIMITER_TAG,null});
+                    done = true;
+                }
+                // READ THIRD PARTY ANNOTATION SECTION
+                else if (token.equals(TPA_TAG)) {
+                    //      exception = don't know how to do TPA yet
+                    // TODO: 2016/6/27 run into here with accession BK000583, HE580237 
+                    /*String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle TPAs just yet",sectionToString(section));
+                    System.err.println("error happens: " + message);
+                    throw new ParseException(message);*/
+                    section.add(new String[]{TPA_TAG, null});
+                    done = true;
+                }
+                // READ CONTIG SECTION
+                //else if (token.equals(CONTIG_TAG)) {
+                //      exception = don't know how to do contigs yet
+                //String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle contig assemblies just yet",sectionToString(section));
+                //throw new ParseException(message);
+                //}
+                //2016.1.27 modified by Xiujuan for parsing file, file containing CONTIG_TAG
+                else if (token.equals(CONTIG_TAG)) {
+                    section.add(new String[]{CONTIG_TAG,null});
+                    done = true;
+                }
+                // READ DOCREF
+                else if (token.equals(DATABASE_XREF_TAG)) {
+                    section.add(new String[]{DATABASE_XREF_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ DATE
+                else if (token.equals(DATE_TAG)) {
+                    section.add(new String[]{DATE_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ NORMAL TAG/VALUE SECTION
+                else {
+                    //      rewind buffer to mark
+                    br.reset();
+                    //      read token/values until XX
+                    String currentTag = null;
+                    StringBuffer currentVal = null;
+                    while (!done) {
+                        line = br.readLine();
+                        if (line.startsWith(DELIMITER_TAG)) {
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            try {
+                                //      merge neighbouring repeated tokens by concatting values
+                                //      return tag->value pairs
+                                String tag = line.substring(0,2);
+                                String value = line.substring(5);
+                                if (currentTag==null || !tag.equals(currentTag)) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // start new tag
+                                    currentTag = tag;
+                                    currentVal = new StringBuffer();
+                                    currentVal.append(value);
+                                } else {
+                                    currentVal.append("\n");
+                                    currentVal.append(value);
+                                }
+                            } catch (Exception e) {
+                                String message = ParseException.newMessage(this.getClass(), accession, "not set","",sectionToString(section));
+                                System.err.println("error happens: " + message);
+                                throw new ParseException(e, message);
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (IOException e) {
+            String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle TPAs just yet",sectionToString(section));
+            System.err.println("error happens: " + message);
+            throw new ParseException(message);
+        }
+        return section;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void	writeSequence(Sequence seq, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void writeSequence(Sequence seq, String format, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        this.writeSequence(seq, format, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     * Namespace is ignored as EMBL has no concept of it.
+     */
+    public void writeSequence(Sequence seq, Namespace ns) throws IOException {
+        this.writeSequence(seq, this.getDefaultFormat(), ns);
+    }
+
+    /**
+     * As per {@link #writeSequence(Sequence, Namespace)}, except
+     * that it also takes a format parameter. This can be any of the formats
+     * defined as constants in this class.
+     * @param seq see {@link #writeSequence(Sequence, Namespace)}
+     * @param format the format to use.
+     * @param ns see {@link #writeSequence(Sequence, Namespace)}
+     * @throws IOException see {@link #writeSequence(Sequence, Namespace)}
+     */
+    public void writeSequence(Sequence seq, String format, Namespace ns) throws IOException {
+        if (!format.equals(EMBL_FORMAT) && !format.equals(EMBL_PRE87_FORMAT))
+            throw new IllegalArgumentException("Format "+format+" not recognised.");
+
+        RichSequence rs;
+        try {
+            if (seq instanceof RichSequence) rs = (RichSequence)seq;
+            else rs = RichSequence.Tools.enrich(seq);
+        } catch (ChangeVetoException e) {
+            IOException e2 = new IOException("Unable to enrich sequence");
+            e2.initCause(e);
+            throw e2;
+        }
+
+        SymbolTokenization tok;
+        try {
+            tok = rs.getAlphabet().getTokenization("token");
+        } catch (Exception e) {
+            throw new RuntimeException("Unable to get alphabet tokenizer",e);
+        }
+
+        Set<Note> notes = rs.getNoteSet();
+        String accession = rs.getAccession();
+        StringBuffer accessions = new StringBuffer();
+        accessions.append(accession);
+        accessions.append(";");
+        String cdat = null;
+        String udat = null;
+        String crel = null;
+        String urel = null;
+        String urecv = null;
+        String organelle = null;
+        String versionLine = null;
+        String dataClass = "STD";
+        boolean genomic = false;
+        String moltype = rs.getAlphabet().getName();
+        for (Iterator<Note> i = notes.iterator(); i.hasNext(); ) {
+            Note n = i.next();
+            if (n.getTerm().equals(Terms.getDateCreatedTerm())) cdat=n.getValue();
+            else if (n.getTerm().equals(Terms.getDateUpdatedTerm())) udat=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelCreatedTerm())) crel=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelUpdatedTerm())) urel=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelUpdatedRecordVersionTerm())) urecv=n.getValue();
+            else if (n.getTerm().equals(Terms.getMolTypeTerm())) moltype=n.getValue();
+            else if (n.getTerm().equals(Terms.getVersionLineTerm())) versionLine=n.getValue();
+            else if (n.getTerm().equals(Terms.getGenomicTerm())) genomic = true;
+            else if (n.getTerm().equals(Terms.getDataClassTerm())) dataClass = n.getValue();
+            else if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                accessions.append(" ");
+                accessions.append(n.getValue());
+                accessions.append(";");
+            } else if (n.getTerm().equals(Terms.getOrganelleTerm())) organelle=n.getValue();
+        }
+
+        StringBuffer locusLine = new StringBuffer();
+        // Division cannot be null
+        String div = rs.getDivision();
+        if(div==null || div.length()==0 || div.length()>3)
+            div = "UNC"; //Unclassified
+
+        if (format.equals(EMBL_FORMAT)) {
+            // accession; SV version; circular/linear; moltype; dataclass; division; length BP.
+            locusLine.append(rs.getAccession());
+            locusLine.append("; SV ");
+            locusLine.append(rs.getVersion());
+            locusLine.append("; ");
+            locusLine.append(rs.getCircular()?"circular":"linear");
+            locusLine.append("; ");
+            locusLine.append(moltype);
+            locusLine.append("; ");
+            locusLine.append(dataClass);
+            locusLine.append("; ");
+            locusLine.append(div);
+            locusLine.append("; ");
+            locusLine.append(rs.length());
+            locusLine.append(" BP.");
+        } else if (format.equals(EMBL_PRE87_FORMAT)) {
+            // entryname  dataclass; [circular] molecule; division; sequencelength BP.
+            locusLine.append(StringTools.rightPad(rs.getName(), 9));
+            locusLine.append(" standard; ");
+            locusLine.append(rs.getCircular()?"circular ":"");
+            // if it is Ensembl genomic, add that in too
+            if (genomic==true) locusLine.append("genomic ");
+            locusLine.append(moltype);
+            locusLine.append("; ");
+            locusLine.append(div);
+            locusLine.append("; ");
+            locusLine.append(rs.length());
+            locusLine.append(" BP.");
+        }
+        StringTools.writeKeyValueLine(LOCUS_TAG, locusLine.toString(), 5, this.getLineWidth(), null, LOCUS_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // accession line
+        StringTools.writeKeyValueLine(ACCESSION_TAG, accessions.toString(), 5, this.getLineWidth(), null, ACCESSION_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // version line
+        if (format.equals(EMBL_PRE87_FORMAT)) {
+            if (versionLine!=null) StringTools.writeKeyValueLine(VERSION_TAG, versionLine, 5, this.getLineWidth(), null, VERSION_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(VERSION_TAG, accession+"."+rs.getVersion(), 5, this.getLineWidth(), null, VERSION_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // date line
+        StringTools.writeKeyValueLine(DATE_TAG, (cdat==null?udat:cdat)+" (Rel. "+(crel==null?"0":crel)+", Created)", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        StringTools.writeKeyValueLine(DATE_TAG, udat+" (Rel. "+(urel==null?"0":urel)+", Last updated, Version "+(urecv==null?"0":urecv)+")", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // definition line
+        StringTools.writeKeyValueLine(DEFINITION_TAG, rs.getDescription(), 5, this.getLineWidth(), null, DEFINITION_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // keywords line
+        StringBuffer keywords = new StringBuffer();
+        for (Iterator<Note> n = notes.iterator(); n.hasNext(); ) {
+            Note nt = n.next();
+            if (nt.getTerm().equals(Terms.getKeywordTerm())) {
+                if (keywords.length()>0) keywords.append("; ");
+                keywords.append(nt.getValue());
+            }
+        }
+        if (keywords.length()>0) {
+            keywords.append(".");
+            StringTools.writeKeyValueLine(KEYWORDS_TAG, keywords.toString(), 5, this.getLineWidth(), null, KEYWORDS_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        } else {
+            this.getPrintStream().println(KEYWORDS_TAG+"   .");
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // source line (from taxon)
+        //   organism line
+        NCBITaxon tax = rs.getTaxon();
+        if (tax!=null) {
+            StringTools.writeKeyValueLine(SOURCE_TAG, tax.getDisplayName(), 5, this.getLineWidth(), null, SOURCE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(ORGANISM_TAG, tax.getNameHierarchy(), 5, this.getLineWidth(), null, SOURCE_TAG, this.getPrintStream());
+            if (organelle!=null) StringTools.writeKeyValueLine(ORGANELLE_TAG, organelle, 5, this.getLineWidth(), null, ORGANELLE_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // references - rank (bases x to y)
+        for (Iterator<RankedDocRef> r = rs.getRankedDocRefs().iterator(); r.hasNext(); ) {
+            RankedDocRef rdr = r.next();
+            DocRef d = rdr.getDocumentReference();
+            // RN, RC, RP, RX, RG, RA, RT, RL
+            StringTools.writeKeyValueLine(REFERENCE_TAG, "["+rdr.getRank()+"]", 5, this.getLineWidth(), null, REFERENCE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(REMARK_TAG, d.getRemark(), 5, this.getLineWidth(), null, REMARK_TAG, this.getPrintStream());
+            Integer rstart = rdr.getStart();
+            if (rstart==null) rstart = new Integer(1);
+            Integer rend = rdr.getEnd();
+            if (rend==null) rend = new Integer(rs.length());
+            StringTools.writeKeyValueLine(REFERENCE_POSITION_TAG, rstart+"-"+rend, 5, this.getLineWidth(), null, REFERENCE_POSITION_TAG, this.getPrintStream());
+            CrossRef c = d.getCrossref();
+            if (c!=null) StringTools.writeKeyValueLine(REFERENCE_XREF_TAG, c.getDbname()+"; "+c.getAccession()+".", 5, this.getLineWidth(), null, REFERENCE_XREF_TAG, this.getPrintStream());
+            List<DocRefAuthor> auths = d.getAuthorList();
+            for (Iterator<DocRefAuthor> j = auths.iterator(); j.hasNext(); ) {
+                DocRefAuthor a = j.next();
+                if (a.isConsortium()) {
+                    StringTools.writeKeyValueLine(CONSORTIUM_TAG, a+";", 5, this.getLineWidth(), null, CONSORTIUM_TAG, this.getPrintStream());
+                    j.remove();
+                }
+            }
+            if (!auths.isEmpty()) StringTools.writeKeyValueLine(AUTHORS_TAG, DocRefAuthor.Tools.generateAuthorString(auths, true)+";", 5, this.getLineWidth(), null, AUTHORS_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(AUTHORS_TAG, ";", 5, this.getLineWidth(), null, AUTHORS_TAG, this.getPrintStream());
+            if (d.getTitle()!=null && d.getTitle().length()!=0) StringTools.writeKeyValueLine(TITLE_TAG, "\""+d.getTitle()+"\";", 5, this.getLineWidth(), null, TITLE_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(TITLE_TAG, ";", 5, this.getLineWidth(), null, TITLE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(LOCATOR_TAG, d.getLocation()+".", 5, this.getLineWidth(), null, LOCATOR_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // db references - ranked
+        for (Iterator<RankedCrossRef> r = rs.getRankedCrossRefs().iterator(); r.hasNext(); ) {
+            RankedCrossRef rcr = r.next();
+            CrossRef c = rcr.getCrossRef();
+            Set<Note> noteset = c.getNoteSet();
+            StringBuffer sb = new StringBuffer();
+            sb.append(c.getDbname());
+            sb.append("; ");
+            sb.append(c.getAccession());
+            boolean hasSecondary = false;
+            for (Iterator<Note> i = noteset.iterator(); i.hasNext(); ) {
+                Note n = i.next();
+                if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                    sb.append("; ");
+                    sb.append(n.getValue());
+                    hasSecondary = true;
+                }
+            }
+            //if (!hasSecondary) sb.append("; -");
+            //sb.append(".");
+            if (!hasSecondary) sb.append(";");
+            else sb.append(".");
+            StringTools.writeKeyValueLine(DATABASE_XREF_TAG, sb.toString(), 5, this.getLineWidth(), null, DATABASE_XREF_TAG, this.getPrintStream());
+        }
+        if (!rs.getRankedCrossRefs().isEmpty())
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // comments - if any
+        if (!rs.getComments().isEmpty()) {
+            StringBuffer sb = new StringBuffer();
+            for (Iterator<Comment> i = rs.getComments().iterator(); i.hasNext(); ) {
+                Comment c = i.next();
+                sb.append(c.getComment());
+                if (i.hasNext()) sb.append("\n");
+            }
+            StringTools.writeKeyValueLine(COMMENT_TAG, sb.toString(), 5, this.getLineWidth(), null, COMMENT_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        this.getPrintStream().println(FEATURE_HEADER_TAG+"   Key             Location/Qualifiers");
+        this.getPrintStream().println(FEATURE_HEADER_TAG+"   ");
+        // feature_type     location
+        for (Iterator i = rs.getFeatureSet().iterator(); i.hasNext(); ) {
+            RichFeature f = (RichFeature)i.next();
+            StringTools.writeKeyValueLine(FEATURE_TAG+"   "+f.getTypeTerm().getName(), GenbankLocationParser.writeLocation((RichLocation)f.getLocation()), 21, this.getLineWidth(), ",", FEATURE_TAG, this.getPrintStream());
+            for (Iterator<Note> j = f.getNoteSet().iterator(); j.hasNext(); ) {
+                Note n = j.next();
+                // /key="val" or just /key if val==""
+                if (n.getValue()==null || n.getValue().length()==0) StringTools.writeKeyValueLine(FEATURE_TAG, "/"+n.getTerm().getName(), 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+                else StringTools.writeKeyValueLine(FEATURE_TAG, "/"+n.getTerm().getName()+"=\""+n.getValue()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+            // add-in to source feature only organism and db_xref="taxon:xyz" where present
+            if (f.getType().equals("source") && tax!=null) {
+                String displayName = tax.getDisplayName();
+                if (displayName.indexOf('(')>-1) displayName = displayName.substring(0, displayName.indexOf('(')).trim();
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/organism=\""+displayName+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/db_xref=\"taxon:"+tax.getNCBITaxID()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+            // add-in other dbxrefs where present
+            for (Iterator<RankedCrossRef> j = f.getRankedCrossRefs().iterator(); j.hasNext(); ) {
+                RankedCrossRef rcr = j.next();
+                CrossRef cr = rcr.getCrossRef();
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/db_xref=\""+cr.getDbname()+":"+cr.getAccession()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+        }
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
+        int aCount = 0;
+        int cCount = 0;
+        int gCount = 0;
+        int tCount = 0;
+        int oCount = 0;
+        for (int i = 1; i <= rs.length(); i++) {
+            char c;
+            try {
+                c = tok.tokenizeSymbol(rs.symbolAt(i)).charAt(0);
+            } catch (Exception e) {
+                throw new RuntimeException("Unable to get symbol at position "+i,e);
+            }
+            switch (c) {
+                case 'a': case 'A':
+                    aCount++;
+                    break;
+                case 'c': case 'C':
+                    cCount++;
+                    break;
+                case 'g': case 'G':
+                    gCount++;
+                    break;
+                case 't': case 'T':
+                    tCount++;
+                    break;
+                default:
+                    oCount++;
+            }
+        }
+        this.getPrintStream().print(START_SEQUENCE_TAG+"   Sequence "+rs.length()+" BP; ");
+        this.getPrintStream().print(aCount + " A; ");
+        this.getPrintStream().print(cCount + " C; ");
+        this.getPrintStream().print(gCount + " G; ");
+        this.getPrintStream().print(tCount + " T; ");
+        this.getPrintStream().println(oCount + " other;");
+
+        // sequence stuff
+        Symbol[] syms = (Symbol[])rs.toList().toArray(new Symbol[0]);
+        int lineLen = 0;
+        int symCount = 0;
+        this.getPrintStream().print("    ");
+        for (int i = 0; i < syms.length; i++) {
+            if (symCount % 60 == 0 && symCount>0) {
+                this.getPrintStream().print(StringTools.leftPad(""+symCount,10));
+                this.getPrintStream().print("\n    ");
+                lineLen = 0;
+            }
+            if (symCount % 10 == 0) {
+                this.getPrintStream().print(" ");
+                lineLen++;
+            }
+            try {
+                this.getPrintStream().print(tok.tokenizeSymbol(syms[i]));
+            } catch (IllegalSymbolException e) {
+                throw new RuntimeException("Found illegal symbol: "+syms[i]);
+            }
+            symCount++;
+            lineLen++;
+        }
+        this.getPrintStream().print(StringTools.leftPad(""+symCount,(66-lineLen)+10));
+        this.getPrintStream().print("\n");
+        this.getPrintStream().println(END_SEQUENCE_TAG);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getDefaultFormat() {
+        return EMBL_FORMAT;
+    }
+
+
+    /**
+     * Converts the current parse section to a String. Useful for debugging.
+     */
+    String sectionToString(List section){
+        StringBuffer parseBlock = new StringBuffer();
+        for(Iterator i = section.listIterator(); i.hasNext();){
+            String[] part = (String[])i.next();
+            for(int x = 0; x < part.length; x++){
+                parseBlock.append(part[x]);
+                if(x == 0){
+                    parseBlock.append("   "); //the gap will have been trimmed
+                }
+            }
+        }
+        return parseBlock.toString();
+    }
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEnsemblFormat.java b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEnsemblFormat.java
new file mode 100644
index 0000000..c65eb63
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomEnsemblFormat.java
@@ -0,0 +1,1133 @@
+package cn.piflow.bundle.microorganism.util;
+
+import org.biojava.bio.seq.Sequence;
+import org.biojava.bio.seq.io.ParseException;
+import org.biojava.bio.seq.io.SeqIOListener;
+import org.biojava.bio.seq.io.SymbolTokenization;
+import org.biojava.bio.symbol.IllegalSymbolException;
+import org.biojava.bio.symbol.Symbol;
+import org.biojava.utils.ChangeVetoException;
+import org.biojavax.*;
+import org.biojavax.bio.seq.RichFeature;
+import org.biojavax.bio.seq.RichLocation;
+import org.biojavax.bio.seq.RichSequence;
+import org.biojavax.bio.seq.io.GenbankLocationParser;
+import org.biojavax.bio.seq.io.RichSeqIOListener;
+import org.biojavax.bio.seq.io.RichSequenceFormat;
+import org.biojavax.bio.taxa.NCBITaxon;
+import org.biojavax.bio.taxa.SimpleNCBITaxon;
+import org.biojavax.ontology.ComparableTerm;
+import org.biojavax.utils.StringTools;
+
+import java.io.*;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by xiujuan on 2016/1/27.
+ */
+public class CustomEnsemblFormat extends RichSequenceFormat.HeaderlessFormat {
+    // Register this format with the format auto-guesser.
+    static {
+        RichSequence.IOTools.registerFormat(CustomEMBLFormat.class);
+    }
+
+    /**
+     * The name of the Pre-87 format
+     */
+    public static final String EMBL_PRE87_FORMAT = "EMBL_PRE87";
+
+    /**
+     * The name of the current format
+     */
+    public static final String EMBL_FORMAT = "EMBL";
+
+    protected static final String LOCUS_TAG = "ID";
+    protected static final String ACCESSION_TAG = "AC";
+    protected static final String VERSION_TAG = "SV";
+    protected static final String DEFINITION_TAG = "DE";
+    protected static final String DATE_TAG = "DT";
+    protected static final String DATABASE_XREF_TAG = "DR";
+    protected static final String SOURCE_TAG = "OS";
+    protected static final String ORGANISM_TAG = "OC";
+    protected static final String ORGANELLE_TAG = "OG";
+    protected static final String REFERENCE_TAG = "RN";
+    protected static final String REFERENCE_POSITION_TAG = "RP";
+    protected static final String REFERENCE_XREF_TAG = "RX";
+    protected static final String AUTHORS_TAG = "RA";
+    protected static final String CONSORTIUM_TAG = "RG";
+    protected static final String TITLE_TAG = "RT";
+    protected static final String LOCATOR_TAG = "RL";
+    protected static final String REMARK_TAG = "RC";
+    protected static final String KEYWORDS_TAG = "KW";
+    protected static final String COMMENT_TAG = "CC";
+    protected static final String FEATURE_HEADER_TAG = "FH";
+    protected static final String FEATURE_TAG = "FT";
+    protected static final String CONTIG_TAG = "CO";
+    protected static final String TPA_TAG = "AH";
+    protected static final String START_SEQUENCE_TAG = "SQ";
+    protected static final String DELIMITER_TAG = "XX";
+    protected static final String END_SEQUENCE_TAG = "//";
+
+    // the date pattern Ensembl file
+    protected static final Pattern dp_ensembl = Pattern.compile("([^\\s]+)");
+    // the date pattern
+    // date (Rel. N, Created)
+    // date (Rel. N, Last updated, Version M)
+    protected static final Pattern dp = Pattern.compile("([^\\s]+)\\s*(\\(Rel\\.\\s+(\\d+), ([^\\)\\d]+)(\\d*)\\))?$");
+    // locus line
+    protected static final Pattern lp = Pattern.compile("^(\\S+);\\s+SV\\s+(\\d+);\\s+(linear|circular);\\s+(\\S+\\s?\\S+?);\\s+(\\S+);\\s+(\\S+);\\s+(\\d+)\\s+(BP|AA)\\.$");
+    protected static final Pattern lpPre87 = Pattern.compile("^(\\S+)\\s+standard;\\s+(circular)?\\s*(genomic)?\\s*(\\S+);\\s+(\\S+);\\s+(\\d+)\\s+BP\\.$");
+    //protected static final Pattern ensembl_id = Pattern.compile("^\\S+\\s+\\S+;\\s+\\S+;\\s+\\S+;\\s+(\\d+)\\s+BP\\.$");
+    // version line
+    protected static final Pattern vp = Pattern.compile("^(\\S+?)\\.(\\d+)$");
+    // reference position line
+    protected static final Pattern rpp = Pattern.compile("^(\\d+)(-(\\d+))?,?(\\s\\d+-\\d+,?)*$");
+    // dbxref line
+    protected static final Pattern dbxp = Pattern.compile("^([^:]+):(.+)$");
+
+    protected static final Pattern readableFileNames = Pattern.compile(".*\\u002e(em|dat).*");
+    protected static final Pattern headerLine = Pattern.compile("^ID.*");
+
+    private NCBITaxon tax = null;
+    private String organism = null;
+    private String accession = null;
+
+    /**
+     * Implements some EMBL-specific terms.
+     */
+    public static class Terms extends RichSequence.Terms {
+
+        /**
+         * Getter for the RelUpdatedRecordVersion term
+         * @return The RelUpdatedRecordVersion Term
+         */
+        public static ComparableTerm getRelUpdatedRecordVersionTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("RelUpdatedRecordVersion");
+        }
+
+        /**
+         * Getter for the EMBL term
+         * @return The EMBL Term
+         */
+        public static ComparableTerm getEMBLTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("EMBL");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'genomic' term
+         * @return The genomic Term
+         */
+        public static ComparableTerm getGenomicTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("genomic");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'versionLine' term
+         * @return The version line Term
+         */
+        public static ComparableTerm getVersionLineTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("versionLine");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'dataClass' term
+         * @return The data class Term
+         */
+        public static ComparableTerm getDataClassTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("dataClass");
+        }
+
+        /**
+         * Getter for the Ensembl-specific 'organism' term
+         * @return The organism Term - "ORGANISM_TAG"
+         * added by xiujuan 2016-1-28
+         */
+        public static ComparableTerm getOrganismTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("organism");
+        }
+
+        /**
+         * @return The length
+         * added by xiujuan 2016-1-28
+         */
+        public static ComparableTerm getLengthTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("length");
+        }
+
+        /**
+         * for the ensembl file "DT" parse
+         * @return The Date
+         * added by xiujuan 2016-1-28
+         */
+        public static ComparableTerm getDateTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("date");
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     * A file is in EMBL format if its name contains the word eem or edat, or the first line matches
+     * the EMBL format for the ID line.
+     */
+    public boolean canRead(File file) throws IOException {
+        if (readableFileNames.matcher(file.getName()).matches()) return true;
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp.matcher(firstLine.substring(3).trim()).matches() ||
+                        lpPre87.matcher(firstLine.substring(3).trim()).matches()
+                );
+        br.close();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a DNA tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
+        return RichSequence.IOTools.getDNAParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     * A stream is in EMBL format if its first line matches the EMBL format for the ID line.
+     */
+    public boolean canRead(BufferedInputStream stream) throws IOException {
+        stream.mark(2000); // some streams may not support this
+        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp.matcher(firstLine.substring(3).trim()).matches() ||
+                        lpPre87.matcher(firstLine.substring(3).trim()).matches()
+                );
+        // don't close the reader as it'll close the stream too.
+        // br.close();
+        stream.reset();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a DNA tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(BufferedInputStream stream) throws IOException {
+        return RichSequence.IOTools.getDNAParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readSequence(BufferedReader reader,
+                                SymbolTokenization symParser,
+                                SeqIOListener listener)
+            throws IllegalSymbolException, IOException, ParseException {
+        if (!(listener instanceof RichSeqIOListener)) throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
+        return this.readRichSequence(reader,symParser,(RichSeqIOListener)listener,null);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readRichSequence(BufferedReader reader,
+                                    SymbolTokenization symParser,
+                                    RichSeqIOListener rlistener,
+                                    Namespace ns)
+            throws IllegalSymbolException, IOException, ParseException {
+        tax = null;
+        organism = null;
+        accession = null;
+        boolean hasAnotherSequence = true;
+        //boolean hasInternalWhitespace = false;
+
+        rlistener.startSequence();
+
+        if (ns==null) ns=RichObjectFactory.getDefaultNamespace();
+        rlistener.setNamespace(ns);
+
+        // Get an ordered list of key->value pairs in array-tuples
+        String sectionKey = null;
+        do {
+            List section = this.readSection(reader);
+            sectionKey = ((String[])section.get(0))[0];
+            if(sectionKey == null){
+
+                String message = ParseException.newMessage(this.getClass(), accession, "No section key", "Not set", sectionToString(section));
+                throw new ParseException(message);
+            }
+            // process section-by-section
+            if (sectionKey.equals(LOCUS_TAG)) {
+                // entryname  dataclass; [circular] molecule; division; sequencelength BP.
+                String loc = ((String[])section.get(0))[1];
+                Matcher m = lp.matcher(loc);
+                Matcher mPre87 = lpPre87.matcher(loc);
+                if (m.matches()) {
+                    // first token is both name and primary accession
+                    rlistener.setName(m.group(1));
+                    rlistener.setAccession(m.group(1));
+                    // second token is version
+                    rlistener.setVersion(Integer.parseInt(m.group(2)));
+                    // third token is circular/linear
+                    rlistener.setCircular(m.group(3).equals("circular"));
+                    // fourth token is moltype
+                    rlistener.addSequenceProperty(Terms.getMolTypeTerm(),m.group(4));
+                    // fifth token is data class
+                    rlistener.addSequenceProperty(Terms.getDataClassTerm(),m.group(5));
+                    // sixth token is taxonomic division
+                    rlistener.setDivision(m.group(6));
+                    // seventh token is sequence length, which is ignored
+                    rlistener.addSequenceProperty(Terms.getLengthTerm(),m.group(7));
+                    // as it is calculated from the sequence data later.
+                } else if (mPre87.matches()) {
+                    rlistener.setName(mPre87.group(1));
+                    if (mPre87.group(3)!=null) {
+                        // add annotation for 'genomic' (Ensembl-specific term)
+                        rlistener.addSequenceProperty(Terms.getGenomicTerm(),null);
+                    }
+                    rlistener.addSequenceProperty(Terms.getMolTypeTerm(),mPre87.group(4));
+                    rlistener.setDivision(mPre87.group(5));
+                    rlistener.addSequenceProperty(Terms.getLengthTerm(), mPre87.group(6));
+                    // Optional extras
+                    String circular = mPre87.group(2);
+                    if (circular!=null) rlistener.setCircular(true);
+                } else {
+                    String message = ParseException.newMessage(this.getClass(),accession,"Not Set","Bad ID line found", sectionToString(section));
+                    throw new ParseException(message);
+                }
+            } else if (sectionKey.equals(DEFINITION_TAG)) {
+                rlistener.setDescription(((String[])section.get(0))[1]);
+            } else if (sectionKey.equals(SOURCE_TAG)) {
+                // only interested in organelle sub-tag
+                for (int i = 1; i < section.size(); i++) {
+                    sectionKey = ((String[])section.get(i))[0];
+                    if (sectionKey.equals(ORGANELLE_TAG)) {
+                        rlistener.addSequenceProperty(Terms.getOrganelleTerm(), ((String[])section.get(i))[1].trim());
+                        break; // skip out of for loop once found
+                    }
+                    if(sectionKey.equals(ORGANISM_TAG)){
+                        rlistener.addSequenceProperty(Terms.getOrganismTerm(), ((String[])section.get(i))[1].trim());
+                        break;
+                    }
+                }
+            } else if (sectionKey.equals(DATE_TAG)) {
+                String chunk = ((String[])section.get(0))[1].trim();
+                Matcher dm = dp.matcher(chunk);
+                Matcher dm_ensembl = dp_ensembl.matcher(chunk);
+                if(dm_ensembl.matches()){
+                    String date = dm_ensembl.group(1);
+                    rlistener.addSequenceProperty(Terms.getDateTerm(),date);
+                }else if (dm.matches()) {
+                    String date = dm.group(1);
+                    String rel = dm.group(3);
+                    String type = dm.group(4);
+                    if (type.equals("Created")) {
+                        rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), date);
+                        rlistener.addSequenceProperty(Terms.getRelCreatedTerm(), rel);
+                    } else if (type.equals("Last updated, Version ")) {
+                        rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(), date);
+                        rlistener.addSequenceProperty(Terms.getRelUpdatedTerm(), rel);
+                        rlistener.addSequenceProperty(Terms.getRelUpdatedRecordVersionTerm(), dm.group(5));
+                    } else {
+                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad date type found",sectionToString(section));
+                        throw new ParseException(message);
+                    }
+                } else {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad date line found",sectionToString(section));
+                    throw new ParseException(message);
+
+                }
+            } else if (sectionKey.equals(ACCESSION_TAG)) {
+                // if multiple accessions, store only first as accession,
+                // and store rest in annotation
+                String[] accs = ((String[])section.get(0))[1].split(";");
+                accession = accs[0].trim();
+                rlistener.setAccession(accession);
+                for (int i = 1; i < accs.length; i++) {
+                    rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),accs[i].trim());
+                }
+            } else if (sectionKey.equals(VERSION_TAG)) {
+                String ver = ((String[])section.get(0))[1];
+                /*Matcher m = vp.matcher(ver);
+                if (m.matches()) {
+                    String verAcc = m.group(1);
+                    if (!accession.equals(verAcc)) {
+                        // the version refers to a different accession!
+                        // believe the version line, and store the original
+                        // accession away in the additional accession set
+                        rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),accession);
+                        accession = verAcc;
+                        rlistener.setAccession(accession);
+                    }
+                    rlistener.setVersion(Integer.parseInt(m.group(2)));
+                } else {*/
+                    rlistener.addSequenceProperty(Terms.getVersionLineTerm(),ver);
+                //}
+            } else if (sectionKey.equals(KEYWORDS_TAG)) {
+                String val = ((String[])section.get(0))[1];
+                val = val.substring(0,val.length()-1); // chomp dot
+                val = val.replace('\n',' '); //remove newline
+                String[] kws = val.split(";");
+                for (int i = 0; i < kws.length; i++) {
+                    String kw = kws[i].trim();
+                    if (kw.length()==0) continue;
+                    rlistener.addSequenceProperty(Terms.getKeywordTerm(), kw);
+                }
+            } else if (sectionKey.equals(DATABASE_XREF_TAG)) {
+                String val = ((String[])section.get(0))[1];
+                val = val.substring(0,val.length()-1); // chomp dot
+                // database_identifier; primary_identifier; secondary_identifier....
+                String[] parts = val.split(";");
+                // construct a DBXREF out of the dbname part[0] and accession part[1]
+                CrossRef crossRef = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{parts[0].trim(),parts[1].trim(), new Integer(0)});
+                // assign remaining bits of info as annotations
+                for (int j = 2; j < parts.length; j++) {
+                    Note note = new SimpleNote(Terms.getAdditionalAccessionTerm(),parts[j].trim(),j-1);
+                    try {
+                        crossRef.getRichAnnotation().addNote(note);
+                    } catch (ChangeVetoException ce) {
+                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "Could not annotate identifier terms",sectionToString(section));
+                        ParseException pe = new ParseException(message);
+                        pe.initCause(ce);
+                        throw pe;
+                    }
+                }
+                RankedCrossRef rcrossRef = new SimpleRankedCrossRef(crossRef, 0);
+                rlistener.setRankedCrossRef(rcrossRef);
+            } else if (sectionKey.equals(REFERENCE_TAG) && !this.getElideReferences()) {
+                // first line of section has rank and location
+                String refrank = ((String[])section.get(0))[1];
+                int ref_rank = Integer.parseInt(refrank.substring(1,refrank.length()-1));
+                int ref_start = -999;
+                int ref_end = -999;
+                // rest can be in any order
+                String consortium = null;
+                String authors = "";
+                String title = null;
+                String locator = null;
+                String pubmed = null;
+                String medline = null;
+                String doi = null;
+                String remark = null;
+                for (int i = 1; i < section.size(); i++) {
+                    String key = ((String[])section.get(i))[0];
+                    String val = ((String[])section.get(i))[1];
+                    if (key.equals(AUTHORS_TAG)) {
+                        if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                        authors = val.replace('\n',' '); //see #2276
+                    }
+                    if (key.equals(CONSORTIUM_TAG)) {
+                        if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                        consortium = val.replace('\n',' '); //see #2276
+                    }
+                    if (key.equals(TITLE_TAG)) {
+                        if (val.length()>1) {
+                            if (val.endsWith(";")) val = val.substring(0,val.length()-1); // chomp semicolon
+                            if (val.endsWith("\"")) val = val.substring(1,val.length()-1); // chomp quotes
+                            title = val.replace('\n',' '); //see #2276
+                        } else title=null; // single semi-colon indicates no title
+                    }
+                    if (key.equals(LOCATOR_TAG)) {
+                        if (val.endsWith(".")) val = val.substring(0,val.length()-1); // chomp dot
+                        locator = val.replace('\n',' '); //see #2276
+                    }
+                    if (key.equals(REFERENCE_XREF_TAG)) {
+                        // database_identifier; primary_identifier.
+                        String[] refs = val.split("\\.(\\s+|$)");
+                        for (int j = 0 ; j < refs.length; j++) {
+                            if (refs[j].trim().length()==0) continue;
+                            String[] parts = refs[j].split(";");
+                            String db = parts[0];
+                            String ref = parts[1].trim();
+                            if (db.equalsIgnoreCase(Terms.PUBMED_KEY)) pubmed = ref;
+                            else if (db.equalsIgnoreCase(Terms.MEDLINE_KEY)) medline = ref;
+                            else if (db.equalsIgnoreCase(Terms.DOI_KEY)) doi = ref;
+                        }
+                    }
+                    if (key.equals(REMARK_TAG)) remark = val.replace('\n',' '); //see #2276
+                    if (key.equals(REFERENCE_POSITION_TAG)) {
+                        // only the first group is taken
+                        // if we have multiple lines, only the last line is taken
+                        Matcher m = rpp.matcher(val);
+                        if (m.matches()) {
+                            ref_start = Integer.parseInt(m.group(1));
+                            if(m.group(2) != null)
+                                ref_end = Integer.parseInt(m.group(3));
+                        } else {
+                            String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad reference line found",sectionToString(section));
+                            throw new ParseException(message);
+                        }
+                    }
+                }
+                // create the docref object
+                try {
+                    List<DocRefAuthor> authSet = DocRefAuthor.Tools.parseAuthorString(authors);
+                    if (consortium!=null) authSet.add(new SimpleDocRefAuthor(consortium, true, false));
+                    DocRef dr = (DocRef)RichObjectFactory.getObject(SimpleDocRef.class,new Object[]{authSet,locator,title});
+                    // assign either the pubmed or medline to the docref - medline gets priority, then pubmed, then doi
+                    if (medline!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.MEDLINE_KEY, medline, new Integer(0)}));
+                    else if (pubmed!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.PUBMED_KEY, pubmed, new Integer(0)}));
+                    else if (doi!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.DOI_KEY, doi, new Integer(0)}));
+                    // assign the remarks
+                    if (!this.getElideComments()) dr.setRemark(remark);
+                    // assign the docref to the bioentry
+                    RankedDocRef rdr = new SimpleRankedDocRef(dr,
+                            (ref_start != -999 ? new Integer(ref_start) : null),
+                            (ref_end != -999 ? new Integer(ref_end) : null),
+                            ref_rank);
+                    rlistener.setRankedDocRef(rdr);
+                    rlistener.setRankedDocRef(rdr);
+                } catch (ChangeVetoException e) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                    throw new ParseException(e, message);
+                }
+            } else if (sectionKey.equals(COMMENT_TAG) && !this.getElideComments()) {
+                // Set up some comments
+                rlistener.setComment(((String[])section.get(0))[1]);
+            } else if (sectionKey.equals(FEATURE_TAG) && !this.getElideFeatures()) {
+                // starting from second line of input, start a new feature whenever we come across
+                // a key that does not start with /
+                boolean seenAFeature = false;
+                int rcrossrefCount = 0;
+                for (int i = 1 ; i < section.size(); i++) {
+                    String key = ((String[])section.get(i))[0];
+                    String val = ((String[])section.get(i))[1];
+                    if (key.startsWith("/")) {
+                        key = key.substring(1); // strip leading slash
+                        val = val.replaceAll("\\s*[\\n\\r]+\\s*","").trim();
+                        if (val.startsWith("\"")) val = val.substring(1,val.length()-1).trim(); // strip quotes
+                        // parameter on old feature
+                        if (key.equalsIgnoreCase("db_xref")) {
+                            Matcher m = dbxp.matcher(val);
+                            if (m.matches()) {
+                                String dbname = m.group(1);
+                                String raccession = m.group(2);
+                                if (dbname.equalsIgnoreCase("taxon")) {
+                                    // Set the Taxon instead of a dbxref
+                                    tax = (NCBITaxon)RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{Integer.valueOf(raccession)});
+                                    rlistener.setTaxon(tax);
+                                    try {
+                                        if (organism!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
+                                    } catch (ChangeVetoException e) {
+                                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                        throw new ParseException(e, message);
+                                    }
+                                } else {
+                                    try {
+                                        CrossRef cr = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{dbname, raccession, new Integer(0)});
+                                        RankedCrossRef rcr = new SimpleRankedCrossRef(cr, ++rcrossrefCount);
+                                        rlistener.getCurrentFeature().addRankedCrossRef(rcr);
+                                    } catch (ChangeVetoException e) {
+                                        String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                        throw new ParseException(e, message);
+                                    }
+                                }
+                            } else {
+                                String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad dbxref found",sectionToString(section));
+                                throw new ParseException(message);
+                            }
+                        } else if (key.equalsIgnoreCase("organism")) {
+                            try {
+                                organism = val;
+                                if (tax!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
+                            } catch (ChangeVetoException e) {
+                                String message = ParseException.newMessage(this.getClass(),accession,"not set", "",sectionToString(section));
+                                throw new ParseException(message);
+                            }
+                        } else {
+                            if (key.equalsIgnoreCase("translation")) {
+                                // strip spaces from sequence
+                                val = val.replaceAll("\\s+","");
+                            }
+                            rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(key),val);
+                        }
+                    } else {
+                        // new feature!
+                        // end previous feature
+                        if (seenAFeature) rlistener.endFeature();
+                        // start next one, with lots of lovely info in it
+                        RichFeature.Template templ = new RichFeature.Template();
+                        templ.annotation = new SimpleRichAnnotation();
+                        templ.sourceTerm = Terms.getEMBLTerm();
+                        templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key);
+                        templ.featureRelationshipSet = new TreeSet();
+                        templ.rankedCrossRefs = new TreeSet();
+                        String tidyLocStr = val.replaceAll("\\s+","");
+                        templ.location = GenbankLocationParser.parseLocation(ns, accession, tidyLocStr);
+                        rlistener.startFeature(templ);
+                        seenAFeature = true;
+                        rcrossrefCount = 0;
+                    }
+                }
+                if (seenAFeature) rlistener.endFeature();
+            } /*else if (sectionKey.equals(START_SEQUENCE_TAG) && !this.getElideSymbols()) {
+                StringBuffer seq = new StringBuffer();
+                for (int i = 0 ; i < section.size(); i++) seq.append(((String[])section.get(i))[1]);
+                try {
+                    SymbolList sl = new SimpleSymbolList(symParser,
+                            seq.toString().replaceAll("\\s+","").replaceAll("[\\.|~]","-"));
+                    rlistener.addSymbols(symParser.getAlphabet(),
+                            (Symbol[])(sl.toList().toArray(new Symbol[0])),
+                            0, sl.length());
+                } catch (Exception e) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad sequence",sectionToString(section));
+                    throw new ParseException(e, message);
+                }
+            }*/
+        } while (!sectionKey.equals(END_SEQUENCE_TAG));
+
+        // Allows us to tolerate trailing whitespace without
+        // thinking that there is another Sequence to follow
+        while (true) {
+            reader.mark(1);
+            int c = reader.read();
+            if (c == -1) {
+                hasAnotherSequence = false;
+                break;
+            }
+            if (Character.isWhitespace((char) c)) {
+                //hasInternalWhitespace = true;
+                continue;
+            }
+            //if (hasInternalWhitespace)
+            //    System.err.println("Warning: whitespace found between sequence entries");
+            reader.reset();
+            break;
+        }
+
+        // Finish up.
+        rlistener.endSequence();
+        return hasAnotherSequence;
+    }
+
+    // reads an indented section, combining split lines and creating a list of key->value tuples
+    private List readSection(BufferedReader br) throws ParseException {
+        List section = new ArrayList();
+        String line;
+        boolean done = false;
+
+        // while not done
+        try {
+            while (!done) {
+                // mark buffer
+                br.mark(160);
+                // read token
+                line = br.readLine();
+                if (line.length()<2) {
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Bad line found",line);
+                    throw new ParseException(message);
+                }
+                String token = line.substring(0,2);
+                // READ SEQUENCE SECTION
+                if (token.equals(START_SEQUENCE_TAG)) {
+                    //      from next line, read sequence until // - leave // on stack
+                    StringBuffer sb = new StringBuffer();
+                    while (!done) {
+                        br.mark(160);
+                        line = br.readLine();
+                        if (line.startsWith(END_SEQUENCE_TAG)) {
+                            br.reset();
+                            done = true;
+                        } else {
+                            //      create sequence tag->value pair to return, sans numbers
+                            sb.append(line.replaceAll("\\d",""));
+                        }
+                    }
+                    section.add(new String[]{START_SEQUENCE_TAG,sb.toString()});
+                }
+                // READ FEATURE TABLE SECTION
+                else if (token.equals(FEATURE_HEADER_TAG)) {
+                    //      create dummy feature tag->value pair and add to return set
+                    section.add(new String[]{FEATURE_TAG,null});
+                    //      drop next FH line
+                    line = br.readLine(); // skip next line too - it is also FH
+                    //      read all FT lines until XX
+                    String currentTag = null;
+                    StringBuffer currentVal = null;
+                    while (!done) {
+                        line = br.readLine();
+                        if (line.startsWith(DELIMITER_TAG)) {
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            //         FT lines:   FT   word            value
+                            //         or          FT                   /word
+                            //         or          FT                   /db_xref="taxon:3899....
+                            //                                          ......"
+                            line = line.substring(5); // chomp off "FT   "
+                            if (!line.startsWith(" ")) {
+                                // dump current tag if exists
+                                if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                // case 1 : word value - splits into key-value on its own
+                                String[] parts = line.trim().split("\\s+");
+                                currentTag = parts[0];
+                                currentVal = new StringBuffer();
+                                currentVal.append(parts[1]);
+                            } else {
+                                line = line.trim();
+                                if (line.startsWith("/")) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // case 2 : /word[=.....]
+                                    currentVal = new StringBuffer();
+                                    int equalIndex = line.indexOf('=');
+                                    if (equalIndex>=0) {
+                                        currentTag = line.substring(0, equalIndex);
+                                        currentVal.append(line.substring(equalIndex+1));
+                                    } else {
+                                        currentTag = line;
+                                    }
+                                } else {
+                                    // case 3 : ...."
+                                    currentVal.append("\n");
+                                    currentVal.append(line);
+                                }
+                            }
+                        }
+                    }
+                }
+                // READ END OF SEQUENCE
+                else if (token.equals(END_SEQUENCE_TAG)) {
+                    section.add(new String[]{END_SEQUENCE_TAG,null});
+                    done = true;
+                }
+                // READ DELIMITER TAG
+                else if (token.equals(DELIMITER_TAG)) {
+                    section.add(new String[]{DELIMITER_TAG,null});
+                    done = true;
+                }
+                // READ THIRD PARTY ANNOTATION SECTION
+                else if (token.equals(TPA_TAG)) {
+                    //      exception = don't know how to do TPA yet
+                    String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle TPAs just yet",sectionToString(section));
+                    throw new ParseException(message);
+                }
+                // READ CONTIG SECTION
+                //else if (token.equals(CONTIG_TAG)) {
+                //      exception = don't know how to do contigs yet
+                //String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle contig assemblies just yet",sectionToString(section));
+                //throw new ParseException(message);
+                //}
+                //2016.1.27 modified by Xiujuan for parsing file, file containing CONTIG_TAG
+                else if (token.equals(CONTIG_TAG)) {
+                    section.add(new String[]{CONTIG_TAG,null});
+                    done = true;
+                }
+                // READ DOCREF
+                else if (token.equals(DATABASE_XREF_TAG)) {
+                    section.add(new String[]{DATABASE_XREF_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ DATE
+                else if (token.equals(DATE_TAG)) {
+                    section.add(new String[]{DATE_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ NORMAL TAG/VALUE SECTION
+                else {
+                    //      rewind buffer to mark
+                    br.reset();
+                    //      read token/values until XX
+                    String currentTag = null;
+                    StringBuffer currentVal = null;
+                    while (!done) {
+                        line = br.readLine();
+                        if (line.startsWith(DELIMITER_TAG)) {
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            try {
+                                //      merge neighbouring repeated tokens by concatting values
+                                //      return tag->value pairs
+                                String tag = line.substring(0,2);
+                                String value = line.substring(5);
+                                if (currentTag==null || !tag.equals(currentTag)) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // start new tag
+                                    currentTag = tag;
+                                    currentVal = new StringBuffer();
+                                    currentVal.append(value);
+                                } else {
+                                    currentVal.append("\n");
+                                    currentVal.append(value);
+                                }
+                            } catch (Exception e) {
+                                String message = ParseException.newMessage(this.getClass(), accession, "not set","",sectionToString(section));
+                                throw new ParseException(e, message);
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (IOException e) {
+            String message = ParseException.newMessage(this.getClass(),accession,"not set", "Unable to handle TPAs just yet",sectionToString(section));
+            throw new ParseException(message);
+        }
+        return section;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void	writeSequence(Sequence seq, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void writeSequence(Sequence seq, String format, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        this.writeSequence(seq, format, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     * Namespace is ignored as EMBL has no concept of it.
+     */
+    public void writeSequence(Sequence seq, Namespace ns) throws IOException {
+        this.writeSequence(seq, this.getDefaultFormat(), ns);
+    }
+
+    /**
+     * As per {@link #writeSequence(Sequence, Namespace)}, except
+     * that it also takes a format parameter. This can be any of the formats
+     * defined as constants in this class.
+     * @param seq see {@link #writeSequence(Sequence, Namespace)}
+     * @param format the format to use.
+     * @param ns see {@link #writeSequence(Sequence, Namespace)}
+     * @throws IOException see {@link #writeSequence(Sequence, Namespace)}
+     */
+    public void writeSequence(Sequence seq, String format, Namespace ns) throws IOException {
+        if (!format.equals(EMBL_FORMAT) && !format.equals(EMBL_PRE87_FORMAT))
+            throw new IllegalArgumentException("Format "+format+" not recognised.");
+
+        RichSequence rs;
+        try {
+            if (seq instanceof RichSequence) rs = (RichSequence)seq;
+            else rs = RichSequence.Tools.enrich(seq);
+        } catch (ChangeVetoException e) {
+            IOException e2 = new IOException("Unable to enrich sequence");
+            e2.initCause(e);
+            throw e2;
+        }
+
+        SymbolTokenization tok;
+        try {
+            tok = rs.getAlphabet().getTokenization("token");
+        } catch (Exception e) {
+            throw new RuntimeException("Unable to get alphabet tokenizer",e);
+        }
+
+        Set<Note> notes = rs.getNoteSet();
+        String accession = rs.getAccession();
+        StringBuffer accessions = new StringBuffer();
+        accessions.append(accession);
+        accessions.append(";");
+        String cdat = null;
+        String udat = null;
+        String crel = null;
+        String urel = null;
+        String urecv = null;
+        String organelle = null;
+        String versionLine = null;
+        String dataClass = "STD";
+        boolean genomic = false;
+        String moltype = rs.getAlphabet().getName();
+        for (Iterator<Note> i = notes.iterator(); i.hasNext(); ) {
+            Note n = i.next();
+            if (n.getTerm().equals(Terms.getDateCreatedTerm())) cdat=n.getValue();
+            else if (n.getTerm().equals(Terms.getDateUpdatedTerm())) udat=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelCreatedTerm())) crel=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelUpdatedTerm())) urel=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelUpdatedRecordVersionTerm())) urecv=n.getValue();
+            else if (n.getTerm().equals(Terms.getMolTypeTerm())) moltype=n.getValue();
+            else if (n.getTerm().equals(Terms.getVersionLineTerm())) versionLine=n.getValue();
+            else if (n.getTerm().equals(Terms.getGenomicTerm())) genomic = true;
+            else if (n.getTerm().equals(Terms.getDataClassTerm())) dataClass = n.getValue();
+            else if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                accessions.append(" ");
+                accessions.append(n.getValue());
+                accessions.append(";");
+            } else if (n.getTerm().equals(Terms.getOrganelleTerm())) organelle=n.getValue();
+        }
+
+        StringBuffer locusLine = new StringBuffer();
+        // Division cannot be null
+        String div = rs.getDivision();
+        if(div==null || div.length()==0 || div.length()>3)
+            div = "UNC"; //Unclassified
+
+        if (format.equals(EMBL_FORMAT)) {
+            // accession; SV version; circular/linear; moltype; dataclass; division; length BP.
+            locusLine.append(rs.getAccession());
+            locusLine.append("; SV ");
+            locusLine.append(rs.getVersion());
+            locusLine.append("; ");
+            locusLine.append(rs.getCircular()?"circular":"linear");
+            locusLine.append("; ");
+            locusLine.append(moltype);
+            locusLine.append("; ");
+            locusLine.append(dataClass);
+            locusLine.append("; ");
+            locusLine.append(div);
+            locusLine.append("; ");
+            locusLine.append(rs.length());
+            locusLine.append(" BP.");
+        } else if (format.equals(EMBL_PRE87_FORMAT)) {
+            // entryname  dataclass; [circular] molecule; division; sequencelength BP.
+            locusLine.append(StringTools.rightPad(rs.getName(), 9));
+            locusLine.append(" standard; ");
+            locusLine.append(rs.getCircular()?"circular ":"");
+            // if it is Ensembl genomic, add that in too
+            if (genomic==true) locusLine.append("genomic ");
+            locusLine.append(moltype);
+            locusLine.append("; ");
+            locusLine.append(div);
+            locusLine.append("; ");
+            locusLine.append(rs.length());
+            locusLine.append(" BP.");
+        }
+        StringTools.writeKeyValueLine(LOCUS_TAG, locusLine.toString(), 5, this.getLineWidth(), null, LOCUS_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // accession line
+        StringTools.writeKeyValueLine(ACCESSION_TAG, accessions.toString(), 5, this.getLineWidth(), null, ACCESSION_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // version line
+        if (format.equals(EMBL_PRE87_FORMAT)) {
+            if (versionLine!=null) StringTools.writeKeyValueLine(VERSION_TAG, versionLine, 5, this.getLineWidth(), null, VERSION_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(VERSION_TAG, accession+"."+rs.getVersion(), 5, this.getLineWidth(), null, VERSION_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // date line
+        StringTools.writeKeyValueLine(DATE_TAG, (cdat==null?udat:cdat)+" (Rel. "+(crel==null?"0":crel)+", Created)", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        StringTools.writeKeyValueLine(DATE_TAG, udat+" (Rel. "+(urel==null?"0":urel)+", Last updated, Version "+(urecv==null?"0":urecv)+")", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // definition line
+        StringTools.writeKeyValueLine(DEFINITION_TAG, rs.getDescription(), 5, this.getLineWidth(), null, DEFINITION_TAG, this.getPrintStream());
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // keywords line
+        StringBuffer keywords = new StringBuffer();
+        for (Iterator<Note> n = notes.iterator(); n.hasNext(); ) {
+            Note nt = n.next();
+            if (nt.getTerm().equals(Terms.getKeywordTerm())) {
+                if (keywords.length()>0) keywords.append("; ");
+                keywords.append(nt.getValue());
+            }
+        }
+        if (keywords.length()>0) {
+            keywords.append(".");
+            StringTools.writeKeyValueLine(KEYWORDS_TAG, keywords.toString(), 5, this.getLineWidth(), null, KEYWORDS_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        } else {
+            this.getPrintStream().println(KEYWORDS_TAG+"   .");
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // source line (from taxon)
+        //   organism line
+        NCBITaxon tax = rs.getTaxon();
+        if (tax!=null) {
+            StringTools.writeKeyValueLine(SOURCE_TAG, tax.getDisplayName(), 5, this.getLineWidth(), null, SOURCE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(ORGANISM_TAG, tax.getNameHierarchy(), 5, this.getLineWidth(), null, SOURCE_TAG, this.getPrintStream());
+            if (organelle!=null) StringTools.writeKeyValueLine(ORGANELLE_TAG, organelle, 5, this.getLineWidth(), null, ORGANELLE_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // references - rank (bases x to y)
+        for (Iterator<RankedDocRef> r = rs.getRankedDocRefs().iterator(); r.hasNext(); ) {
+            RankedDocRef rdr = r.next();
+            DocRef d = rdr.getDocumentReference();
+            // RN, RC, RP, RX, RG, RA, RT, RL
+            StringTools.writeKeyValueLine(REFERENCE_TAG, "["+rdr.getRank()+"]", 5, this.getLineWidth(), null, REFERENCE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(REMARK_TAG, d.getRemark(), 5, this.getLineWidth(), null, REMARK_TAG, this.getPrintStream());
+            Integer rstart = rdr.getStart();
+            if (rstart==null) rstart = new Integer(1);
+            Integer rend = rdr.getEnd();
+            if (rend==null) rend = new Integer(rs.length());
+            StringTools.writeKeyValueLine(REFERENCE_POSITION_TAG, rstart+"-"+rend, 5, this.getLineWidth(), null, REFERENCE_POSITION_TAG, this.getPrintStream());
+            CrossRef c = d.getCrossref();
+            if (c!=null) StringTools.writeKeyValueLine(REFERENCE_XREF_TAG, c.getDbname()+"; "+c.getAccession()+".", 5, this.getLineWidth(), null, REFERENCE_XREF_TAG, this.getPrintStream());
+            List<DocRefAuthor> auths = d.getAuthorList();
+            for (Iterator<DocRefAuthor> j = auths.iterator(); j.hasNext(); ) {
+                DocRefAuthor a = j.next();
+                if (a.isConsortium()) {
+                    StringTools.writeKeyValueLine(CONSORTIUM_TAG, a+";", 5, this.getLineWidth(), null, CONSORTIUM_TAG, this.getPrintStream());
+                    j.remove();
+                }
+            }
+            if (!auths.isEmpty()) StringTools.writeKeyValueLine(AUTHORS_TAG, DocRefAuthor.Tools.generateAuthorString(auths, true)+";", 5, this.getLineWidth(), null, AUTHORS_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(AUTHORS_TAG, ";", 5, this.getLineWidth(), null, AUTHORS_TAG, this.getPrintStream());
+            if (d.getTitle()!=null && d.getTitle().length()!=0) StringTools.writeKeyValueLine(TITLE_TAG, "\""+d.getTitle()+"\";", 5, this.getLineWidth(), null, TITLE_TAG, this.getPrintStream());
+            else StringTools.writeKeyValueLine(TITLE_TAG, ";", 5, this.getLineWidth(), null, TITLE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(LOCATOR_TAG, d.getLocation()+".", 5, this.getLineWidth(), null, LOCATOR_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        // db references - ranked
+        for (Iterator<RankedCrossRef> r = rs.getRankedCrossRefs().iterator(); r.hasNext(); ) {
+            RankedCrossRef rcr = r.next();
+            CrossRef c = rcr.getCrossRef();
+            Set<Note> noteset = c.getNoteSet();
+            StringBuffer sb = new StringBuffer();
+            sb.append(c.getDbname());
+            sb.append("; ");
+            sb.append(c.getAccession());
+            boolean hasSecondary = false;
+            for (Iterator<Note> i = noteset.iterator(); i.hasNext(); ) {
+                Note n = i.next();
+                if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                    sb.append("; ");
+                    sb.append(n.getValue());
+                    hasSecondary = true;
+                }
+            }
+            //if (!hasSecondary) sb.append("; -");
+            //sb.append(".");
+            if (!hasSecondary) sb.append(";");
+            else sb.append(".");
+            StringTools.writeKeyValueLine(DATABASE_XREF_TAG, sb.toString(), 5, this.getLineWidth(), null, DATABASE_XREF_TAG, this.getPrintStream());
+        }
+        if (!rs.getRankedCrossRefs().isEmpty())
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // comments - if any
+        if (!rs.getComments().isEmpty()) {
+            StringBuffer sb = new StringBuffer();
+            for (Iterator<Comment> i = rs.getComments().iterator(); i.hasNext(); ) {
+                Comment c = i.next();
+                sb.append(c.getComment());
+                if (i.hasNext()) sb.append("\n");
+            }
+            StringTools.writeKeyValueLine(COMMENT_TAG, sb.toString(), 5, this.getLineWidth(), null, COMMENT_TAG, this.getPrintStream());
+            this.getPrintStream().println(DELIMITER_TAG+"   ");
+        }
+
+        this.getPrintStream().println(FEATURE_HEADER_TAG+"   Key             Location/Qualifiers");
+        this.getPrintStream().println(FEATURE_HEADER_TAG+"   ");
+        // feature_type     location
+        for (Iterator i = rs.getFeatureSet().iterator(); i.hasNext(); ) {
+            RichFeature f = (RichFeature)i.next();
+            StringTools.writeKeyValueLine(FEATURE_TAG+"   "+f.getTypeTerm().getName(), GenbankLocationParser.writeLocation((RichLocation)f.getLocation()), 21, this.getLineWidth(), ",", FEATURE_TAG, this.getPrintStream());
+            for (Iterator<Note> j = f.getNoteSet().iterator(); j.hasNext(); ) {
+                Note n = j.next();
+                // /key="val" or just /key if val==""
+                if (n.getValue()==null || n.getValue().length()==0) StringTools.writeKeyValueLine(FEATURE_TAG, "/"+n.getTerm().getName(), 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+                else StringTools.writeKeyValueLine(FEATURE_TAG, "/"+n.getTerm().getName()+"=\""+n.getValue()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+            // add-in to source feature only organism and db_xref="taxon:xyz" where present
+            if (f.getType().equals("source") && tax!=null) {
+                String displayName = tax.getDisplayName();
+                if (displayName.indexOf('(')>-1) displayName = displayName.substring(0, displayName.indexOf('(')).trim();
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/organism=\""+displayName+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/db_xref=\"taxon:"+tax.getNCBITaxID()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+            // add-in other dbxrefs where present
+            for (Iterator<RankedCrossRef> j = f.getRankedCrossRefs().iterator(); j.hasNext(); ) {
+                RankedCrossRef rcr = j.next();
+                CrossRef cr = rcr.getCrossRef();
+                StringTools.writeKeyValueLine(FEATURE_TAG, "/db_xref=\""+cr.getDbname()+":"+cr.getAccession()+"\"", 21, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            }
+        }
+        this.getPrintStream().println(DELIMITER_TAG+"   ");
+
+        // SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
+        int aCount = 0;
+        int cCount = 0;
+        int gCount = 0;
+        int tCount = 0;
+        int oCount = 0;
+        for (int i = 1; i <= rs.length(); i++) {
+            char c;
+            try {
+                c = tok.tokenizeSymbol(rs.symbolAt(i)).charAt(0);
+            } catch (Exception e) {
+                throw new RuntimeException("Unable to get symbol at position "+i,e);
+            }
+            switch (c) {
+                case 'a': case 'A':
+                    aCount++;
+                    break;
+                case 'c': case 'C':
+                    cCount++;
+                    break;
+                case 'g': case 'G':
+                    gCount++;
+                    break;
+                case 't': case 'T':
+                    tCount++;
+                    break;
+                default:
+                    oCount++;
+            }
+        }
+        this.getPrintStream().print(START_SEQUENCE_TAG+"   Sequence "+rs.length()+" BP; ");
+        this.getPrintStream().print(aCount + " A; ");
+        this.getPrintStream().print(cCount + " C; ");
+        this.getPrintStream().print(gCount + " G; ");
+        this.getPrintStream().print(tCount + " T; ");
+        this.getPrintStream().println(oCount + " other;");
+
+        // sequence stuff
+        Symbol[] syms = (Symbol[])rs.toList().toArray(new Symbol[0]);
+        int lineLen = 0;
+        int symCount = 0;
+        this.getPrintStream().print("    ");
+        for (int i = 0; i < syms.length; i++) {
+            if (symCount % 60 == 0 && symCount>0) {
+                this.getPrintStream().print(StringTools.leftPad(""+symCount,10));
+                this.getPrintStream().print("\n    ");
+                lineLen = 0;
+            }
+            if (symCount % 10 == 0) {
+                this.getPrintStream().print(" ");
+                lineLen++;
+            }
+            try {
+                this.getPrintStream().print(tok.tokenizeSymbol(syms[i]));
+            } catch (IllegalSymbolException e) {
+                throw new RuntimeException("Found illegal symbol: "+syms[i]);
+            }
+            symCount++;
+            lineLen++;
+        }
+        this.getPrintStream().print(StringTools.leftPad(""+symCount,(66-lineLen)+10));
+        this.getPrintStream().print("\n");
+        this.getPrintStream().println(END_SEQUENCE_TAG);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getDefaultFormat() {
+        return EMBL_FORMAT;
+    }
+
+
+    /**
+     * Converts the current parse section to a String. Useful for debugging.
+     */
+    String sectionToString(List section){
+        StringBuffer parseBlock = new StringBuffer();
+        for(Iterator i = section.listIterator(); i.hasNext();){
+            String[] part = (String[])i.next();
+            for(int x = 0; x < part.length; x++){
+                parseBlock.append(part[x]);
+                if(x == 0){
+                    parseBlock.append("   "); //the gap will have been trimmed
+                }
+            }
+        }
+        return parseBlock.toString();
+    }
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomIOTools.java b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomIOTools.java
index afed93a..0e1c65b 100644
--- a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomIOTools.java
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomIOTools.java
@@ -1,6 +1,5 @@
 package cn.piflow.bundle.microorganism.util;
 
-
 import org.biojava.bio.BioError;
 import org.biojava.bio.BioException;
 import org.biojava.bio.seq.*;
@@ -662,10 +661,19 @@ public interface CustomIOTools {
          * @return a <code>RichSequenceIterator</code> over each sequence in the
          *         fasta file
          */
+        public static RichSequenceIterator readEMBLDNA(BufferedReader br,
+                                                       Namespace ns) {
+            return new RichStreamReader(br, new CustomEMBLFormat(), getDNAParser(),
+                    factory, ns);
+        }
 
 
-
-
+        //parse Ensembl file
+        public static RichSequenceIterator readEnsembl(BufferedReader br,
+                                                       Namespace ns) {
+            return new RichStreamReader(br, new CustomEnsemblFormat(), getDNAParser(),
+                    factory, ns);
+        }
 
         /**
          * Iterate over the sequences in an EMBL-format stream of RNA sequences.
@@ -753,7 +761,11 @@ public interface CustomIOTools {
          * @return a <code>RichSequenceIterator</code> over each sequence in the
          *         fasta file
          */
-
+        public static RichSequenceIterator readUniProt(BufferedReader br,
+                                                       Namespace ns) {
+            return new RichStreamReader(br, new CustomUniProtFormat(),
+                    getProteinParser(), factory, ns);
+        }
 
         /**
          * Read a UniProt XML file using a custom type of SymbolList. For
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomUniProtFormat.java b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomUniProtFormat.java
new file mode 100644
index 0000000..5478a5e
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/CustomUniProtFormat.java
@@ -0,0 +1,1291 @@
+package cn.piflow.bundle.microorganism.util;
+
+import org.biojava.bio.proteomics.MassCalc;
+import org.biojava.bio.seq.Sequence;
+import org.biojava.bio.seq.io.ParseException;
+import org.biojava.bio.seq.io.SeqIOListener;
+import org.biojava.bio.seq.io.SymbolTokenization;
+import org.biojava.bio.symbol.*;
+import org.biojava.ontology.Term;
+import org.biojava.utils.ChangeVetoException;
+import org.biojavax.*;
+import org.biojavax.bio.seq.RichFeature;
+import org.biojavax.bio.seq.RichLocation;
+import org.biojavax.bio.seq.RichSequence;
+import org.biojavax.bio.seq.io.RichSeqIOListener;
+import org.biojavax.bio.seq.io.RichSequenceFormat;
+import org.biojavax.bio.seq.io.UniProtCommentParser;
+import org.biojavax.bio.seq.io.UniProtLocationParser;
+import org.biojavax.bio.taxa.NCBITaxon;
+import org.biojavax.bio.taxa.SimpleNCBITaxon;
+import org.biojavax.ontology.ComparableTerm;
+import org.biojavax.utils.CRC64Checksum;
+import org.biojavax.utils.StringTools;
+
+import java.io.*;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by xiujuan on 2016/5/11.
+ */
+public class CustomUniProtFormat extends RichSequenceFormat.HeaderlessFormat{
+
+
+    // Register this format with the format auto-guesser.
+    static {
+        RichSequence.IOTools.registerFormat(CustomUniProtFormat.class);
+    }
+
+    /**
+     * The name of this format
+     */
+    public static final String UNIPROT_FORMAT = "UniProt";
+
+    private static final String SUBFORMAT_UNIPROT = "UniProt";
+    private static final String SUBFORMAT_IPI = "IPI";
+
+    protected static final String LOCUS_TAG = "ID";
+    protected static final String ACCESSION_TAG = "AC";
+    protected static final String DEFINITION_TAG = "DE";
+    protected static final String DATE_TAG = "DT";
+    protected static final String SOURCE_TAG = "OS";
+    protected static final String ORGANELLE_TAG = "OG";
+    protected static final String ORGANISM_TAG = "OC";
+    protected static final String TAXON_TAG = "OX";
+    protected static final String GENE_TAG = "GN";
+    protected static final String DATABASE_XREF_TAG = "DR";
+    protected static final String PROTEIN_EXIST_TAG = "PE";
+    protected static final String REFERENCE_TAG = "RN";
+    protected static final String RP_LINE_TAG = "RP";
+    protected static final String REFERENCE_XREF_TAG = "RX";
+    protected static final String AUTHORS_TAG = "RA";
+    protected static final String CONSORTIUM_TAG = "RG";
+    protected static final String TITLE_TAG = "RT";
+    protected static final String LOCATION_TAG = "RL";
+    protected static final String RC_LINE_TAG = "RC";
+    protected static final String KEYWORDS_TAG = "KW";
+    protected static final String COMMENT_TAG = "CC";
+    protected static final String FEATURE_TAG = "FT";
+    protected static final String START_SEQUENCE_TAG = "SQ";
+    protected static final String END_SEQUENCE_TAG = "//";
+    protected static final String ORGANISM_HOST_TAG = "OH";
+
+    // locus line for uniprot format
+    protected static final Pattern lp_uniprot = Pattern.compile("^((\\S+)_(\\S+))\\s+(\\S+);\\s+(PRT)?;?\\s*\\d+\\s+AA\\.$");
+    // locus line for IPI format
+    protected static final Pattern lp_ipi = Pattern.compile("^((\\S+)\\.(\\d+))\\s+(IPI);\\s+(PRT)?;?\\s*\\d+\\s+AA\\.$");
+    // RP line parser
+    protected static final Pattern rppat = Pattern.compile("SEQUENCE OF (\\d+)-(\\d+)");
+    // date lineDT for uniprot
+    // date, integrated into UniProtKB/database_name.
+    // date, sequence version x.
+    // date, entry version x.
+    protected static final Pattern dp_uniprot = Pattern.compile("([^,]+),([^\\d\\.]+)(\\d+)?\\.$");
+    // date lineDT for IPI
+    // date (xxx, Created)
+    // date (xxx, Last sequence update)
+    protected static final Pattern dp_ipi = Pattern.compile("([^\\(]+)\\(([^,]+),([^\\)]+)\\)$");
+    // feature line
+    protected static final Pattern fp = Pattern.compile("^\\s*([\\d?<]+\\s+[\\d?>]+)(\\s+(.*))?$");
+
+    protected static final Pattern headerLine = Pattern.compile("^ID.*");
+
+    /**
+     * Implements some UniProt-specific terms.
+     */
+    public static class Terms extends RichSequence.Terms {
+        private static String GENENAME_KEY = "Name";
+        private static String GENESYNONYM_KEY = "Synonyms";
+        private static String ORDLOCNAME_KEY = "OrderedLocusNames";
+        private static String ORFNAME_KEY = "ORFNames";
+
+        /**
+         * Getter for the UniProt term
+         * @return The UniProt Term
+         */
+        public static ComparableTerm getUniProtTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("UniProt");
+        }
+
+        /**
+         * Getter for the UniProt combined database term
+         * @return The combined database for UniProt Term
+         */
+        public static ComparableTerm getUniProtDBNameTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("UniProt database name");
+        }
+
+        /**
+         * Getter for the protein exists term
+         * @return The protein exists Term
+         */
+        public static ComparableTerm getProteinExistsTerm() {
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("UniProt protein exists");
+        }
+
+        public static ComparableTerm getOrganismHostTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("Organism host");
+        }
+
+        public static ComparableTerm getSequenceMetaInfoTerm(){
+            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("Sequence meta info");
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     * A file is in UniProt format if the first line matches the UniProt format for the ID line.
+     */
+    public boolean canRead(File file) throws IOException {
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp_uniprot.matcher(firstLine.substring(3).trim()).matches() ||
+                        lp_ipi.matcher(firstLine.substring(3).trim()).matches());
+        br.close();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a protein tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
+        return RichSequence.IOTools.getProteinParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     * A stream is in UniProt format if the first line matches the UniProt format for the ID line.
+     */
+    public boolean canRead(BufferedInputStream stream) throws IOException {
+        stream.mark(2000); // some streams may not support this
+        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+        String firstLine = br.readLine();
+        boolean readable = firstLine!=null && headerLine.matcher(firstLine).matches() &&
+                (lp_uniprot.matcher(firstLine.substring(3).trim()).matches()
+                        || lp_ipi.matcher(firstLine.substring(3).trim()).matches());
+        // don't close the reader as it'll close the stream too.
+        // br.close();
+        stream.reset();
+        return readable;
+    }
+
+    /**
+     * {@inheritDoc}
+     * Always returns a protein tokenizer.
+     */
+    public SymbolTokenization guessSymbolTokenization(BufferedInputStream stream) throws IOException {
+        return RichSequence.IOTools.getProteinParser();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readSequence(BufferedReader reader,
+                                SymbolTokenization symParser,
+                                SeqIOListener listener)
+            throws IllegalSymbolException, IOException, ParseException {
+        if (!(listener instanceof RichSeqIOListener)) throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
+        return this.readRichSequence(reader,symParser,(RichSeqIOListener)listener,null);
+    }
+
+    private String accession = null;
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean readRichSequence(BufferedReader reader,
+                                    SymbolTokenization symParser,
+                                    RichSeqIOListener rlistener,
+                                    Namespace ns)
+            throws IllegalSymbolException, IOException, ParseException {
+
+        boolean hasAnotherSequence = true;
+        //boolean hasInternalWhitespace = false;
+
+        String subformat = SUBFORMAT_UNIPROT;
+
+        rlistener.startSequence();
+
+        if (ns==null) ns=RichObjectFactory.getDefaultNamespace();
+        rlistener.setNamespace(ns);
+
+        // Get an ordered list of key->value pairs in array-tuples
+        String sectionKey = null;
+        NCBITaxon tax = null;
+        accession = null;
+        List section = null;
+        try{
+            do {
+
+                section = this.readSection(reader);
+                sectionKey = ((String[])section.get(0))[0];
+                if(sectionKey == null){
+                    String message = ParseException.newMessage(this.getClass(),accession, "", "Section key was null", sectionToString(section));
+                    throw new ParseException(message);
+                }
+                // process section-by-section
+                if (sectionKey.equals(LOCUS_TAG)) {
+                    // entryname  dataclass; moltype; sequencelength AA.
+                    String loc = ((String[])section.get(0))[1];
+                    Matcher m = lp_uniprot.matcher(loc);
+                    if (m.matches()) {
+                        rlistener.setName(m.group(2));
+                        rlistener.setDivision(m.group(3));
+                        if (m.groupCount() > 4){
+                            rlistener.addSequenceProperty(Terms.getDataClassTerm(),m.group(4));
+                            rlistener.addSequenceProperty(Terms.getMolTypeTerm(),m.group(5));
+                        }else{
+                            rlistener.addSequenceProperty(Terms.getDataClassTerm(), m.group(4));
+                            rlistener.addSequenceProperty(Terms.getMolTypeTerm(), "");
+                        }
+                    } else {
+                        m = lp_ipi.matcher(loc);
+                        if (m.matches()) {
+                            subformat = SUBFORMAT_IPI;
+                            rlistener.setName(m.group(2));
+                            rlistener.setVersion(Integer.parseInt(m.group(3)));
+                            rlistener.addSequenceProperty(Terms.getDataClassTerm(), m.group(4));
+                            rlistener.addSequenceProperty(Terms.getMolTypeTerm(),m.group(5));
+                        } else {
+                            String message = ParseException.newMessage(this.getClass(),accession, "", "Bad ID line", sectionToString(section));
+                            throw new ParseException(message);
+                        }
+                    }
+                } else if (sectionKey.equals(DEFINITION_TAG)) {
+                    String val = ((String[])section.get(0))[1];
+                    if (val.endsWith(".")) val = val.substring(0, val.length()-1); // chomp dot
+                    rlistener.setDescription(val);
+                } else if (sectionKey.equals(SOURCE_TAG)) {
+                    // use SOURCE_TAG and TAXON_TAG values
+                    String sciname = null;
+                    String comname = null;
+                    List synonym = new ArrayList();
+                    List lineage = new ArrayList();
+                    int taxid = 0;
+                    for (int i = 0; i < section.size(); i++) {
+                        String tag = ((String[])section.get(i))[0];
+                        String value = ((String[])section.get(i))[1].trim();
+                        value = value.replace("\n", " ");
+                        value = value.replace("\r\n", " ");
+
+                        if (tag.equals(SOURCE_TAG)) {
+                            if (value.endsWith(".")) value = value.substring(0,value.length()-1); // chomp trailing dot
+                            String[] parts = value.split("\\(");
+                            sciname = parts[0].trim();
+                            if (parts.length>1) {
+                                comname = parts[1].trim();
+                                if (comname.endsWith(")")) comname = comname.substring(0,comname.length()-1); // chomp trailing bracket
+                                if (parts.length>2) {
+                                    // synonyms
+                                    for (int j = 2 ; j < parts.length; j++) {
+                                        String syn = parts[j].trim();
+                                        if (syn.endsWith(")")) syn = syn.substring(0,syn.length()-1); // chomp trailing bracket
+                                        synonym.add(syn);
+                                    }
+                                }
+                            }
+                        } else if (tag.equals(TAXON_TAG)) {
+                            String[] parts = value.split(";");
+                            for (int j = 0; j < parts.length; j++) {
+                                String[] bits = parts[j].split("=");
+                                if (bits[0].equals("NCBI_TaxID")) {
+                                    String[] morebits = bits[1].split(",");
+                                    taxid = Integer.parseInt(morebits[0].split(" ")[0].trim());
+                                }
+                            }
+                        } else if (tag.equals(ORGANELLE_TAG)) {
+                            if (value.endsWith(".")) value = value.substring(0,value.length()-1); // chomp trailing dot
+                            String[] parts = value.split(";");
+                            for (int j = 0; j < parts.length; j++) {
+                                parts[j]=parts[j].trim();
+                                rlistener.addSequenceProperty(Terms.getOrganelleTerm(),parts[j]);
+                            }
+                        }
+                        //added by xiujuan 2016.5.12
+                        else if(tag.equals(ORGANISM_TAG)){
+                            if (value.endsWith(".")) value = value.substring(0,value.length()-1); // chomp trailing dot
+                            String[] parts = value.split(";");
+                            for (int j = 0; j < parts.length; j++) {
+                                parts[j]=parts[j].trim();
+                                lineage.add(parts[j]);
+                            }
+                        }else if(tag.equals(ORGANISM_HOST_TAG)) {  //"OH"tag Organism Host
+                            String[] parts = value.split("\\. ");
+                            for(int j = 0; j < parts.length; j++){
+                                rlistener.addSequenceProperty(Terms.getOrganismHostTerm(),parts[j]);
+                            }
+                        }
+                    }
+                    // Set the Taxon
+                    tax = (NCBITaxon)RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{new Integer(taxid)});
+                    rlistener.setTaxon(tax);
+                    try {
+                        if (sciname!=null) tax.addName(NCBITaxon.SCIENTIFIC,sciname);
+                        if (comname!=null) tax.addName(NCBITaxon.COMMON,comname);
+                        for (Iterator j = synonym.iterator(); j.hasNext(); ) tax.addName(NCBITaxon.SYNONYM, (String)j.next());
+                        for(Iterator j = lineage.iterator();j.hasNext();)tax.addName("lineage",(String)j.next());
+                    } catch (ChangeVetoException e) {
+                        throw new ParseException(e);
+                    }
+                } else if (sectionKey.equals(DATE_TAG)) {
+                    String chunk = ((String[])section.get(0))[1];
+                    if(subformat.equals(SUBFORMAT_UNIPROT)) {
+                        Matcher dm = dp_uniprot.matcher(chunk);
+                        if (dm.matches()) {
+                            String date = dm.group(1).trim();
+                            String type = dm.group(2).trim();
+                            String rel = dm.group(3);
+                            if (rel!=null) rel = rel.trim();
+                            if (type.startsWith("integrated into UniProtKB")) {
+                                String dbname = type.split("/")[1];
+                                rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), date);
+                                rlistener.addSequenceProperty(Terms.getUniProtDBNameTerm(), dbname);
+                            } else if (type.equalsIgnoreCase("sequence version")) {
+                                if (rel==null){
+                                    String message = ParseException.newMessage(this.getClass(),accession, "", "Version missing for "+type, sectionToString(section));
+                                    throw new ParseException(message);
+                                }
+                                rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(), date);
+                                rlistener.setVersion(Integer.parseInt(rel));
+                            } else if (type.equalsIgnoreCase("entry version")) {
+                                if (rel==null) {
+                                    String message = ParseException.newMessage(this.getClass(),accession, "", "Version missing for "+type, sectionToString(section));
+                                    throw new ParseException(message);
+                                }
+                                rlistener.addSequenceProperty(Terms.getDateAnnotatedTerm(), date);
+                                rlistener.addSequenceProperty(Terms.getRelAnnotatedTerm(), rel);
+                            } else {
+                                String message = ParseException.newMessage(this.getClass(),accession, "", "Bad date type "+type, sectionToString(section));
+                                throw new ParseException(message);
+                            }
+                        } else {
+                            String message = ParseException.newMessage(this.getClass(),accession, "", "Bad date line", sectionToString(section));
+                            throw new ParseException(message);
+                        }
+                    } else if(subformat.equals(SUBFORMAT_IPI)) {
+                        Matcher dm = dp_ipi.matcher(chunk);
+                        if (dm.matches()) {
+                            String date = dm.group(1).trim();
+                            String type = dm.group(3).trim();
+                            if(type.equals("Created")) {
+                                rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), date);
+                            } else if(type.equals("Last sequence update")) {
+                                rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(), date);
+                            } else {
+                                String message = ParseException.newMessage(this.getClass(),accession, "", "Bad date type "+type, sectionToString(section));
+                                throw new ParseException(message);
+                            }
+                        } else {
+                            String message = ParseException.newMessage(this.getClass(),accession, "", "Bad date line", sectionToString(section));
+                            throw new ParseException(message);
+                        }
+                    } else {
+                        String message = ParseException.newMessage(this.getClass(),accession, "", "Unknown date line format", sectionToString(section));
+                        throw new ParseException(message);
+                    }
+                } else if (sectionKey.equals(ACCESSION_TAG)) {
+                    // if multiple accessions, store only first as accession,
+                    // and store rest in annotation
+                    String[] accs = ((String[])section.get(0))[1].split(";");
+                    if(accs.length>0) accession = accs[0].trim(); else accession = "";
+                    rlistener.setAccession(accession);
+                    for (int i = 1; i < accs.length; i++) {
+                        rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),accs[i].trim());
+                    }
+                } else if (sectionKey.equals(PROTEIN_EXIST_TAG)) {
+                    String val = ((String[])section.get(0))[1];
+                    if (val.endsWith(";")) val = val.substring(0, val.length()-1); // chomp semicolon
+                    rlistener.addSequenceProperty(Terms.getProteinExistsTerm(),val.trim());
+                } else if (sectionKey.equals(KEYWORDS_TAG)) {
+                    String val = ((String[])section.get(0))[1];
+                    if (val.endsWith(".")) val = val.substring(0, val.length()-1); // chomp dot
+                    val = val.replace('\n',' '); //remove newline
+                    String[] kws = val.split(";");
+                    for (int i = 0; i < kws.length; i++) {
+                        String kw = kws[i].trim();
+                        if (kw.length()==0) continue;
+                        rlistener.addSequenceProperty(Terms.getKeywordTerm(), kw);
+                    }
+                } else if (sectionKey.equals(GENE_TAG)) {
+                    String[] genes = ((String[])section.get(0))[1].split("\\s+(or|and)\\s+");
+                    for (int geneID = 0; geneID < genes.length; geneID++) {
+                        String[] parts = genes[geneID].replace('\n', ' ').split(";");
+                        for (int j = 0; j < parts.length; j++) {
+                            if(parts[j].matches(".+=.+")){
+                                String[] moreparts = parts[j].split("=");
+                                String[] values = moreparts[1].split(",");
+                                // nasty hack - we really should have notes on the gene object itself... if such a thing existed...
+                                if (moreparts[0].trim().equals(Terms.GENENAME_KEY)) rlistener.addSequenceProperty(Terms.getGeneNameTerm(),geneID+":"+values[0].trim());
+                                else if (moreparts[0].trim().equals(Terms.GENESYNONYM_KEY)) {
+                                    for (int k = 0; k < values.length; k++) rlistener.addSequenceProperty(Terms.getGeneSynonymTerm(),geneID+":"+values[k].trim());
+                                } else if (moreparts[0].trim().equals(Terms.ORDLOCNAME_KEY)) {
+                                    for (int k = 0; k < values.length; k++) rlistener.addSequenceProperty(Terms.getOrderedLocusNameTerm(),geneID+":"+values[k].trim());
+                                } else if (moreparts[0].trim().equals(Terms.ORFNAME_KEY)) {
+                                    for (int k = 0; k < values.length; k++) rlistener.addSequenceProperty(Terms.getORFNameTerm(),geneID+":"+values[k].trim());
+                                }
+                            }
+                        }
+                    }
+                } else if (sectionKey.equals(DATABASE_XREF_TAG)) {
+                    // database_identifier; primary_identifier; secondary_identifier....
+                    String val = ((String[])section.get(0))[1];
+                    if (val.endsWith(".")) val = val.substring(0, val.length()-1); // chomp dot
+                    String[] parts = val.split(";");
+                    // construct a DBXREF out of the dbname part[0] and accession part[1]
+                    String dbname = parts[0].trim();
+                    String acc = parts[1].trim();
+                    CrossRef crossRef = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{dbname,acc,new Integer(0)});
+                    // assign remaining bits of info as additional accession annotations
+                    for (int j = 2; j < parts.length; j++) {
+                        ComparableTerm t = (ComparableTerm) Terms.getAdditionalAccessionTerm();
+                        Note note = new SimpleNote(t,parts[j].trim(),j-1);
+                        try {
+                            crossRef.getRichAnnotation().addNote(note);
+                        } catch (ChangeVetoException ce) {
+                            ParseException pe = new ParseException("Could not annotate additional accession terms");
+                            pe.initCause(ce);
+                            throw pe;
+                        }
+                    }
+                    RankedCrossRef rcrossRef = new SimpleRankedCrossRef(crossRef, 0);
+                    rlistener.setRankedCrossRef(rcrossRef);
+                } else if (sectionKey.equals(REFERENCE_TAG) && !this.getElideReferences()) {
+                    // first line of section has rank and location
+                    String refrank = ((String[])section.get(0))[1];
+                    refrank = refrank.trim().split(" ")[0];
+                    int ref_rank = Integer.parseInt(refrank.substring(1,refrank.length()-1));
+                    // rest can be in any order
+                    String authors = null;
+                    String consortium = null;
+                    String title = null;
+                    String locator = null;
+                    String pubmed = null;
+                    String medline = null;
+                    String doi = null;
+                    String remark = null;
+                    Integer rstart = null;
+                    Integer rend = null;
+                    for (int i = 1; i < section.size(); i++) {
+                        String key = ((String[])section.get(i))[0];
+                        String val = ((String[])section.get(i))[1];
+                        //System.err.println(key+": "+val);
+                        if (key.equals(AUTHORS_TAG)) {
+                            if (val.endsWith(";")) val = val.substring(0, val.length()-1); // chomp semicolon
+                            authors = val.replace('\n',' '); //see #2276
+                        }
+                        if (key.equals(CONSORTIUM_TAG)) {
+                            if (val.endsWith(";")) val = val.substring(0, val.length()-1); // chomp semicolon
+                            consortium = val.replace('\n',' '); //see #2276
+                        }
+                        if (key.equals(TITLE_TAG)) {
+                            if (val.endsWith(";")) val = val.substring(0, val.length()-1); // chomp semicolon
+                            if (val.endsWith("\"")) val = val.substring(1, val.length()-1); // chomp quotes
+                            title = val.replace('\n',' '); //see #2276
+                        }
+                        if (key.equals(LOCATION_TAG)) {
+                            if (val.endsWith(".")) val = val.substring(0, val.length()-1); // chomp dot
+                            locator = val.replace('\n',' '); //see #2276
+                        }
+                        if (key.equals(REFERENCE_XREF_TAG)) {
+                            // database_identifier=primary_identifier;
+                            String[] refs = val.split(";");
+                            for (int j = 0 ; j < refs.length; j++) {
+                                if (refs[j].trim().length()==0) continue;
+                                String[] parts = refs[j].split("=");
+                                if ( parts.length <2) {
+                                    // some DOI lines look like this and are causing problems:
+                                    //DOI=10.1002/(SICI)1097-0215(19990702)82:1<137::AID-IJC23>3.0.CO;2-F;ignoring
+                                    System.err.println("warning: problems while parsing: " + val);
+                                    continue;
+                                }
+                                String db = parts[0].trim();
+                                String ref = parts[1].trim();
+                                if (db.equalsIgnoreCase(Terms.PUBMED_KEY)) pubmed = ref;
+                                else if (db.equalsIgnoreCase(Terms.MEDLINE_KEY)) medline = ref;
+                                else if (db.equalsIgnoreCase(Terms.DOI_KEY)) doi = ref;
+                            }
+                        }
+                        if (key.equals(RP_LINE_TAG)) {
+                            if (val.endsWith(".")) val = val.substring(0, val.length()-1); // chomp dot
+                            remark = val.replace('\n',' '); //see #2276
+                            // Try to use it to find the location of the reference, if we have one.
+                            Matcher m = rppat.matcher(val);
+                            if (m.matches()) {
+                                rstart = Integer.valueOf(m.group(1));
+                                rend = Integer.valueOf(m.group(2));
+                            }
+                        }
+                        if (key.equals(RC_LINE_TAG)) {
+                            // Split into key=value pairs separated by semicolons and terminated with semicolon.
+                            String[] parts = val.split(";");
+                            for (int j = 0; j < parts.length; j++) {
+                                String[] subparts = parts[j].split("=");
+                                // get term for first section
+                                String termName = subparts[0].trim();
+                                Term t;
+                                if (termName.equalsIgnoreCase(Terms.SPECIES_KEY)) t = Terms.getSpeciesTerm();
+                                else if (termName.equalsIgnoreCase(Terms.STRAIN_KEY)) t = Terms.getStrainTerm();
+                                else if (termName.equalsIgnoreCase(Terms.TISSUE_KEY)) t = Terms.getTissueTerm();
+                                else if (termName.equalsIgnoreCase(Terms.TRANSPOSON_KEY)) t = Terms.getTransposonTerm();
+                                else if (termName.equalsIgnoreCase(Terms.PLASMID_KEY)) t = Terms.getPlasmidTerm();
+                                else {
+                                    String message = ParseException.newMessage(this.getClass(),accession, "", "Invalid RC term found: "+termName, sectionToString(section));
+                                    throw new ParseException(message);
+                                }
+                                // assign notes using term and rank:second section as value
+                                // nasty hack - we really should have notes on the reference itself.
+                                rlistener.addSequenceProperty("docref_" + t.toString(), ref_rank+":"+subparts[1].trim());
+                            }
+                        }
+                    }
+
+                    // create the docref object
+                    try {
+                        List auths = null;
+                        if(authors != null) auths = DocRefAuthor.Tools.parseAuthorString(authors);
+                        if (consortium!=null){
+                            if(auths == null) auths = new ArrayList();
+                            auths.add(new SimpleDocRefAuthor(consortium,true,false));
+                        }
+                        DocRef dr = (DocRef)RichObjectFactory.getObject(SimpleDocRef.class,new Object[]{auths,locator,title});
+
+                        //save all Crossref to the sequence property
+                        if (medline!=null) rlistener.addSequenceProperty("docref_"+"medline", ref_rank+":"+medline);
+                        if (pubmed!=null) rlistener.addSequenceProperty("docref_"+"pubmed", ref_rank+":"+pubmed);
+                        if (doi!=null)  rlistener.addSequenceProperty("docref_"+"doi", ref_rank+":"+doi);
+                            // assign either the pubmed or medline to the docref - medline gets priority, then pubmed, then doi
+//                        if (medline!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.MEDLINE_KEY, medline, new Integer(0)}));
+//                        else if (pubmed!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.PUBMED_KEY, pubmed, new Integer(0)}));
+//                        else if (doi!=null) dr.setCrossref((CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.DOI_KEY, doi, new Integer(0)}));
+                        // assign the remarks
+                        if (!this.getElideComments()) dr.setRemark(remark);
+                        // assign the docref to the bioentry
+                        RankedDocRef rdr = new SimpleRankedDocRef(dr,rstart,rend,ref_rank);
+                        rlistener.setRankedDocRef(rdr);
+                    } catch (ChangeVetoException e) {
+                        throw new ParseException(e);
+                    }
+                } else if (sectionKey.equals(COMMENT_TAG) && !this.getElideComments()) {
+                    // Set up some comments
+                    String val = ((String[])section.get(0))[1];
+                    if (UniProtCommentParser.isParseable(val)) rlistener.setComment(val);
+                    else {
+                        // copyright message
+                        rlistener.addSequenceProperty(Terms.getCopyrightTerm(), val);
+                    }
+                } else if (sectionKey.equals(FEATURE_TAG) && !this.getElideFeatures()) {
+                    // starting from second line of input, start a new feature whenever we come across
+                    // a key that does not start with /
+                    boolean seenAFeature = false;
+                    for (int i = 1 ; i < section.size(); i++) {
+                        String key = ((String[])section.get(i))[0];
+                        String val = ((String[])section.get(i))[1];
+                        val = val.replaceAll("\\s*[\\n\\r]+\\s*", " ").trim();
+                        if (val.endsWith(".")) val = val.substring(0,val.length()-1); // chomp dot
+                        if (key.startsWith("/")) {
+                            key = key.substring(1); // strip leading slash
+                            if (key.equals("FTId")) rlistener.addFeatureProperty(Terms.getFTIdTerm(),val);
+                            else {
+                                // should never happen - but here just in case
+                                rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(key),val);
+                            }
+                        } else {
+                            // new feature!
+                            // end previous feature
+                            if (seenAFeature) rlistener.endFeature();
+                            // start next one, with lots of lovely info in it
+                            RichFeature.Template templ = new RichFeature.Template();
+                            templ.annotation = new SimpleRichAnnotation();
+                            templ.sourceTerm = Terms.getUniProtTerm();
+                            templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key);
+                            templ.featureRelationshipSet = new TreeSet();
+                            templ.rankedCrossRefs = new TreeSet();
+                            String desc = null;
+                            Matcher m = fp.matcher(val);
+                            if (m.matches()) {
+                                String loc = m.group(1);
+                                desc = m.group(3);
+                                templ.location = UniProtLocationParser.parseLocation(loc);
+                            } else {
+                                String message = ParseException.newMessage(this.getClass(),accession, "", "Bad feature value: "+val, sectionToString(section));
+                                throw new ParseException(message);
+                            }
+                            rlistener.startFeature(templ);
+                            if (desc!=null && desc.length()>0) rlistener.addFeatureProperty(Terms.getFeatureDescTerm(),desc);
+                            seenAFeature = true;
+                        }
+                    }
+                    if (seenAFeature) rlistener.endFeature();
+                } else if (sectionKey.equals(START_SEQUENCE_TAG) && !this.getElideSymbols()) {
+                    StringBuffer seq = new StringBuffer();
+
+                    for (int i = 0 ; i < section.size()-1; i++) seq.append(((String[])section.get(i))[1]);
+                    String seqMetaInfo = ((String[])section.get(section.size()-1))[1];
+                    rlistener.addSequenceProperty(Terms.getSequenceMetaInfoTerm(), seqMetaInfo);
+                    //section size greater than 1?
+                    try {
+                        SymbolList sl = new SimpleSymbolList(symParser,
+                                seq.toString().replaceAll("\\s+","").replaceAll("[\\.|~]","-"));
+                        rlistener.addSymbols(symParser.getAlphabet(),
+                                (Symbol[])(sl.toList().toArray(new Symbol[0])),
+                                0, sl.length());
+                    } catch (IllegalAlphabetException e) {
+                        String message = ParseException.newMessage(this.getClass(),accession, "", "", sectionToString(section));
+                        throw new ParseException(e, message);
+                    }
+                }
+            } while (!sectionKey.equals(END_SEQUENCE_TAG));
+        }catch (RuntimeException e){
+            String message = ParseException.newMessage(this.getClass(),accession, "", "", sectionToString(section));
+            throw new ParseException(e, message);
+        }
+
+        // Allows us to tolerate trailing whitespace without
+        // thinking that there is another Sequence to follow
+        while (true) {
+            reader.mark(1);
+            int c = reader.read();
+            if (c == -1) {
+                hasAnotherSequence = false;
+                break;
+            }
+            if (Character.isWhitespace((char) c)) {
+                //hasInternalWhitespace = true;
+                continue;
+            }
+            //if (hasInternalWhitespace)
+            //System.err.println("Warning: whitespace found between sequence entries");
+            reader.reset();
+            break;
+        }
+
+        // Finish up.
+        rlistener.endSequence();
+        return hasAnotherSequence;
+    }
+
+    // reads an indented section, combining split lines and creating a list of key->value tuples
+    private List readSection(BufferedReader br) throws ParseException {
+        List section = new ArrayList();
+        String line;
+        boolean done = false;
+
+        // while not done
+        try {
+            while (!done) {
+                // mark buffer
+                br.mark(320);
+                // read token
+                line = br.readLine();
+                if (line.length()<2) {
+                    String message = ParseException.newMessage(this.getClass(),accession, "", "Bad line found: "+line, sectionToString(section));
+                    throw new ParseException(message);
+                }
+                String token = line.substring(0,2);
+                // READ SEQUENCE SECTION
+                if (token.equals(START_SEQUENCE_TAG)) {
+                    //      from next line, read sequence until // - leave // on stack
+                    StringBuffer sb = new StringBuffer();
+                    String sequence_meta_info = line.substring(5);
+                    while (!done) {
+                        br.mark(160);
+                        line = br.readLine();
+                        if (line.startsWith(END_SEQUENCE_TAG)) {
+                            br.reset();
+                            done = true;
+                        } else {
+                            //      create sequence tag->value pair to return, sans numbers
+                            sb.append(line);
+                        }
+                    }
+                    section.add(new String[]{START_SEQUENCE_TAG,sb.toString()});
+                    section.add(new String[]{"Sequence_Meta_Info", sequence_meta_info});
+                }
+                // READ COMMENT SECTION
+                else if (token.equals(COMMENT_TAG)) {
+                    // read from first line till next that begins with "CC   -!-"
+                    StringBuffer currentVal = new StringBuffer();
+                    boolean wasMisc = false;
+                    if (!line.startsWith(COMMENT_TAG+"   -!-")) wasMisc = true;
+                    currentVal.append(line.substring(5));
+                    while (!done) {
+                        br.mark(160);
+                        line = br.readLine();
+                        if (((!wasMisc) && line.charAt(5)!=' ') || !line.startsWith("C") || line.startsWith(COMMENT_TAG+"   -!-")) {
+                            br.reset();
+                            done = true;
+                            // dump current tag if exists
+                            section.add(new String[]{COMMENT_TAG,currentVal.toString()});
+                        } else {
+                            currentVal.append("\n");
+                            currentVal.append(line.substring(5));
+                        }
+                    }
+                }
+                // READ FEATURE TABLE SECTION
+                else if (token.equals(FEATURE_TAG)) {
+                    br.reset();
+                    //      read all FT lines until first non-FT starting line
+                    String currentTag = null;
+                    StringBuffer currentVal = new StringBuffer();
+                    section.add(new String[]{FEATURE_TAG,null});
+                    while (!done) {
+                        br.mark(160);
+                        line = br.readLine();
+                        if (!line.startsWith(FEATURE_TAG)) {
+                            br.reset();
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            //         FT lines:   FT   KEY_NAME     x      x        description
+                            //         or:         FT                                ....
+                            //         or          FT                                /FTId=899.
+                            line = line.substring(5); // chomp off "FT   "
+                            if (!line.startsWith(" ")) {
+                                // dump current tag if exists
+                                if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                // case 1 : word value - splits into key-value based on first 8 chars
+                                currentTag = line.substring(0,8).trim();
+                                currentVal = new StringBuffer();
+                                currentVal.append(line.substring(8).trim());
+                            } else {
+                                line = line.trim();
+                                if (line.startsWith("/") && line.indexOf("=") != -1) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // case 3 : /word=.....
+                                    currentVal = new StringBuffer();
+                                    int equalIndex = line.indexOf('=');
+                                    if (equalIndex>=0) {
+                                        currentTag = line.substring(0, equalIndex);
+                                        currentVal.append(line.substring(equalIndex+1));
+                                    } else {
+                                        currentTag = line;
+                                    }
+                                } else {
+                                    // case 2 : ...."
+                                    currentVal.append("\n");
+                                    currentVal.append(line);
+                                }
+                            }
+                        }
+                    }
+                }
+                // READ DOCREF
+                else if (token.equals(DATABASE_XREF_TAG)) {
+                    section.add(new String[]{DATABASE_XREF_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ DATE
+                else if (token.equals(DATE_TAG)) {
+                    section.add(new String[]{DATE_TAG,line.substring(5).trim()});
+                    done = true;
+                }
+                // READ END OF SEQUENCE
+                else if (token.equals(END_SEQUENCE_TAG)) {
+                    section.add(new String[]{END_SEQUENCE_TAG,null});
+                    done = true;
+                }
+                // READ NORMAL TAG/VALUE SECTION
+                else {
+                    //      rewind buffer to mark
+                    br.reset();
+                    //      read token/values until first with non-same first character
+                    //      exceptions: DE/DT, and RN...RN
+                    String currentTag = null;
+                    char currentTagStart = '\0';
+                    StringBuffer currentVal = null;
+                    while (!done) {
+                        br.mark(320);
+                        line = br.readLine();
+                        if (currentTagStart=='\0') currentTagStart = line.charAt(0);
+                        if (!line.startsWith(""+currentTagStart) ||
+                                (currentTagStart=='D' && currentTag!=null && !line.startsWith(""+currentTag)) ||
+                                (currentTagStart=='R' && currentTag!=null && line.startsWith("RN"))) {
+                            br.reset();
+                            done = true;
+                            // dump current tag if exists
+                            if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                        } else {
+                            try {
+                                //      merge neighbouring repeated tokens by concatting values
+                                //      return tag->value pairs
+                                String tag = line.substring(0,2);
+                                String value = line.substring(5);
+                                if (currentTag==null || !tag.equals(currentTag)) {
+                                    // dump current tag if exists
+                                    if (currentTag!=null) section.add(new String[]{currentTag,currentVal.toString()});
+                                    // start new tag
+                                    currentTag = tag;
+                                    currentVal = new StringBuffer();
+                                    currentVal.append(value);
+                                } else {
+                                    currentVal.append("\n");
+                                    currentVal.append(value);
+                                }
+                            } catch (Exception e) {
+                                String message = ParseException.newMessage(this.getClass(),accession, "", "", sectionToString(section));
+                                throw new ParseException(e, message);
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (IOException e) {
+            String message = ParseException.newMessage(this.getClass(),accession, "", "", sectionToString(section));
+            throw new ParseException(e, message);
+        } catch (RuntimeException e){
+            String message = ParseException.newMessage(this.getClass(),accession, "", "", sectionToString(section));
+            throw new ParseException(e, message);
+        }
+        return section;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void	writeSequence(Sequence seq, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void writeSequence(Sequence seq, String format, PrintStream os) throws IOException {
+        if (this.getPrintStream()==null) this.setPrintStream(os);
+        if (!format.equals(this.getDefaultFormat())) throw new IllegalArgumentException("Unknown format: "+format);
+        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
+    }
+
+    /**
+     * {@inheritDoc}
+     * Namespace is ignored as UniProt has no concept of it.
+     */
+    public void writeSequence(Sequence seq, Namespace ns) throws IOException {
+        RichSequence rs;
+        try {
+            if (seq instanceof RichSequence) rs = (RichSequence)seq;
+            else rs = RichSequence.Tools.enrich(seq);
+        } catch (ChangeVetoException e) {
+            IOException e2 = new IOException("Unable to enrich sequence");
+            e2.initCause(e);
+            throw e2;
+        }
+
+        SymbolTokenization tok;
+        try {
+            tok = rs.getAlphabet().getTokenization("token");
+        } catch (Exception e) {
+            throw new RuntimeException("Unable to get alphabet tokenizer",e);
+        }
+
+        Set<Note> notes = rs.getNoteSet();
+        String accession = rs.getAccession();
+        StringBuffer accessions = new StringBuffer();
+        accessions.append(accession);
+        accessions.append(";");
+        String cdat = null;
+        String udat = null;
+        String adat = null;
+        String dbname = "?";
+        String arel = null;
+        String organelle = null;
+        String protExists = null;
+        String dataclass = "STANDARD";
+        String copyright = null;
+        Map speciesRecs = new TreeMap();
+        Map strainRecs = new TreeMap();
+        Map tissueRecs = new TreeMap();
+        Map transpRecs = new TreeMap();
+        Map plasmidRecs = new TreeMap();
+        Map genenames = new TreeMap();
+        Map genesynonyms = new TreeMap();
+        Map orfnames = new TreeMap();
+        Map ordlocnames = new TreeMap();
+        for (Iterator<Note> i = notes.iterator(); i.hasNext(); ) {
+            Note n = i.next();
+            if (n.getTerm().equals(Terms.getDateCreatedTerm())) cdat=n.getValue();
+            else if (n.getTerm().equals(Terms.getDateUpdatedTerm())) udat=n.getValue();
+            else if (n.getTerm().equals(Terms.getDateAnnotatedTerm())) adat=n.getValue();
+            else if (n.getTerm().equals(Terms.getUniProtDBNameTerm())) dbname=n.getValue();
+            else if (n.getTerm().equals(Terms.getProteinExistsTerm())) protExists=n.getValue();
+            else if (n.getTerm().equals(Terms.getRelAnnotatedTerm())) arel=n.getValue();
+            else if (n.getTerm().equals(Terms.getDataClassTerm())) dataclass = n.getValue();
+            else if (n.getTerm().equals(Terms.getCopyrightTerm())) copyright = n.getValue();
+            else if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                accessions.append(" ");
+                accessions.append(n.getValue());
+                accessions.append(";");
+            } else if (n.getTerm().equals(Terms.getOrganelleTerm())) organelle = (organelle==null?"":organelle+"; ")+n.getValue();
+                // use the nasty hack to split the reference rank away from the actual value in this field
+            else if (n.getTerm().equals(Terms.getGeneNameTerm()))  {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                genenames.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
+            } else if (n.getTerm().equals(Terms.getGeneSynonymTerm())) {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (genesynonyms.get(refID)==null) genesynonyms.put(refID, new ArrayList());
+                ((List)genesynonyms.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getOrderedLocusNameTerm())) {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (ordlocnames.get(refID)==null) ordlocnames.put(refID, new ArrayList());
+                ((List)ordlocnames.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getORFNameTerm())) {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (orfnames.get(refID)==null) orfnames.put(refID, new ArrayList());
+                ((List)orfnames.get(refID)).add(ref.substring(colon+1));
+            }
+            // use the nasty hack to split the reference rank away from the actual value in this field
+            // we'll end up with a bunch in key 0 for those which did not come from us. We ignore these for now.
+            else if (n.getTerm().equals(Terms.getSpeciesTerm())) {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (speciesRecs.get(refID)==null) speciesRecs.put(refID, new ArrayList());
+                ((List)speciesRecs.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getStrainTerm()))  {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (strainRecs.get(refID)==null) strainRecs.put(refID, new ArrayList());
+                ((List)strainRecs.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getTissueTerm()))  {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (tissueRecs.get(refID)==null) tissueRecs.put(refID, new ArrayList());
+                ((List)tissueRecs.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getTransposonTerm()))  {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (transpRecs.get(refID)==null) transpRecs.put(refID, new ArrayList());
+                ((List)transpRecs.get(refID)).add(ref.substring(colon+1));
+            } else if (n.getTerm().equals(Terms.getPlasmidTerm()))  {
+                String ref = n.getValue();
+                int colon = ref.indexOf(':');
+                Integer refID = new Integer(0);
+                if (colon>=1) refID = new Integer(ref.substring(0,colon));
+                if (plasmidRecs.get(refID)==null) plasmidRecs.put(refID, new ArrayList());
+                ((List)plasmidRecs.get(refID)).add(ref.substring(colon+1));
+            }
+        }
+
+        // entryname  dataclass; [circular] molecule; division; sequencelength BP.
+        StringBuffer locusLine = new StringBuffer();
+        locusLine.append(StringTools.rightPad(rs.getName()+"_"+rs.getDivision(),12));
+        locusLine.append(" ");
+        locusLine.append(StringTools.leftPad(dataclass,19));
+        //locusLine.append(";      PRT; "); //Uniprot no longer uses the PRT;
+        locusLine.append("; ");
+        locusLine.append(StringTools.leftPad(""+rs.length(),11));
+        locusLine.append(" AA.");
+        StringTools.writeKeyValueLine(LOCUS_TAG, locusLine.toString(), 5, this.getLineWidth(), null, LOCUS_TAG, this.getPrintStream());
+
+        // accession line
+        StringTools.writeKeyValueLine(ACCESSION_TAG, accessions.toString(), 5, this.getLineWidth(), null, ACCESSION_TAG, this.getPrintStream());
+
+        // date line
+        StringTools.writeKeyValueLine(DATE_TAG, (cdat==null?udat:cdat)+", integrated into UniProtKB/"+dbname+".", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        StringTools.writeKeyValueLine(DATE_TAG, udat+", sequence version "+rs.getVersion()+".", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+        StringTools.writeKeyValueLine(DATE_TAG, (adat==null?udat:adat)+", entry version "+(arel==null?"0":arel)+".", 5, this.getLineWidth(), null, DATE_TAG, this.getPrintStream());
+
+        // definition line
+        StringTools.writeKeyValueLine(DEFINITION_TAG, rs.getDescription()+".", 5, this.getLineWidth(), null, DEFINITION_TAG, this.getPrintStream());
+
+        // gene line
+        for (Iterator i = genenames.keySet().iterator(); i.hasNext(); ) {
+            Integer geneid = (Integer)i.next();
+            String genename = (String)genenames.get(geneid);
+            List synonyms = (List)genesynonyms.get(geneid);
+            List orfs = (List)orfnames.get(geneid);
+            List ordlocs = (List)ordlocnames.get(geneid);
+
+            StringBuffer gnline = new StringBuffer();
+            gnline.append(Terms.GENENAME_KEY);
+            gnline.append("=");
+            gnline.append(genename);
+            gnline.append("; ");
+
+            if (synonyms!=null) {
+                gnline.append(Terms.GENESYNONYM_KEY);
+                gnline.append("=");
+                for (Iterator j = synonyms.iterator(); j.hasNext(); ) {
+                    gnline.append((String)j.next());
+                    if (j.hasNext()) gnline.append(", ");
+                }
+                gnline.append("; ");
+            }
+            if (ordlocs!=null) {
+                gnline.append(Terms.ORDLOCNAME_KEY);
+                gnline.append("=");
+                for (Iterator j = ordlocs.iterator(); j.hasNext(); ) {
+                    gnline.append((String)j.next());
+                    if (j.hasNext()) gnline.append(", ");
+                }
+                gnline.append("; ");
+            }
+            if (orfs!=null) {
+                gnline.append(Terms.ORFNAME_KEY);
+                gnline.append("=");
+                for (Iterator j = orfs.iterator(); j.hasNext(); ) {
+                    gnline.append((String)j.next());
+                    if (j.hasNext()) gnline.append(", ");
+                }
+                gnline.append("; ");
+            }
+
+            StringTools.writeKeyValueLine(GENE_TAG, gnline.toString(), 5, this.getLineWidth(), null, GENE_TAG, this.getPrintStream());
+
+            if (i.hasNext()) StringTools.writeKeyValueLine(GENE_TAG, "and", 5, this.getLineWidth(), null, GENE_TAG, this.getPrintStream());
+        }
+
+        // source line (from taxon)
+        //   organism line
+        NCBITaxon tax = rs.getTaxon();
+        if (tax!=null) {
+            StringBuffer source = new StringBuffer();
+            source.append(tax.getDisplayName());
+            for (Iterator j = tax.getNames(NCBITaxon.SYNONYM).iterator(); j.hasNext(); ) {
+                source.append(" (");
+                source.append((String)j.next());
+                source.append(")");
+            }
+            source.append(".");
+            StringTools.writeKeyValueLine(SOURCE_TAG, source.toString(), 5, this.getLineWidth(), null, SOURCE_TAG, this.getPrintStream());
+            if (organelle!=null) StringTools.writeKeyValueLine(ORGANELLE_TAG, organelle+".", 5, this.getLineWidth(), null, ORGANELLE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(ORGANISM_TAG, tax.getNameHierarchy(), 5, this.getLineWidth(), null, ORGANISM_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(TAXON_TAG, "NCBI_TaxID="+tax.getNCBITaxID()+";", 5, this.getLineWidth(), this.getPrintStream());
+        }
+
+        // references - rank (bases x to y)
+        for (Iterator<RankedDocRef> r = rs.getRankedDocRefs().iterator(); r.hasNext(); ) {
+            RankedDocRef rdr = r.next();
+            DocRef d = rdr.getDocumentReference();
+            // RN, RP, RC, RX, RG, RA, RT, RL
+            StringTools.writeKeyValueLine(REFERENCE_TAG, "["+rdr.getRank()+"]", 5, this.getLineWidth(), null, REFERENCE_TAG, this.getPrintStream());
+            if (d.getRemark()!=null)
+                StringTools.writeKeyValueLine(RP_LINE_TAG, d.getRemark()+".", 5, this.getLineWidth(), null, RP_LINE_TAG, this.getPrintStream());
+            // Print out ref position if present
+            if (rdr.getStart()!=null && rdr.getEnd()!=null && d.getRemark()!=null && !rppat.matcher(d.getRemark()).matches()) StringTools.writeKeyValueLine(RP_LINE_TAG, "SEQUENCE OF "+rdr.getStart()+"-"+rdr.getEnd()+".", 5, this.getLineWidth(), null, RP_LINE_TAG, this.getPrintStream());
+            // RC lines
+            StringBuffer rcline = new StringBuffer();
+            Integer rank = new Integer(rdr.getRank());
+            if (speciesRecs.get(rank)!=null) {
+                rcline.append(Terms.SPECIES_KEY);
+                rcline.append("=");
+                for (Iterator i = ((List)speciesRecs.get(rank)).iterator(); i.hasNext(); ) {
+                    rcline.append((String)i.next());
+                    if (i.hasNext()) rcline.append(", ");
+                }
+                rcline.append("; ");
+            }
+            if (strainRecs.get(rank)!=null) {
+                rcline.append(Terms.STRAIN_KEY);
+                rcline.append("=");
+                for (Iterator i = ((List)strainRecs.get(rank)).iterator(); i.hasNext(); ) {
+                    rcline.append((String)i.next());
+                    if (i.hasNext()) rcline.append(", ");
+                }
+                rcline.append("; ");
+            }
+            if (tissueRecs.get(rank)!=null) {
+                rcline.append(Terms.TISSUE_KEY);
+                rcline.append("=");
+                for (Iterator i = ((List)tissueRecs.get(rank)).iterator(); i.hasNext(); ) {
+                    rcline.append((String)i.next());
+                    if (i.hasNext()) rcline.append(", ");
+                }
+                rcline.append("; ");
+            }
+            if (transpRecs.get(rank)!=null) {
+                rcline.append(Terms.TRANSPOSON_KEY);
+                rcline.append("=");
+                for (Iterator i = ((List)transpRecs.get(rank)).iterator(); i.hasNext(); ) {
+                    rcline.append((String)i.next());
+                    if (i.hasNext()) rcline.append(", ");
+                }
+                rcline.append("; ");
+            }
+            if (plasmidRecs.get(rank)!=null) {
+                rcline.append(Terms.PLASMID_KEY);
+                rcline.append("=");
+                for (Iterator i = ((List)plasmidRecs.get(rank)).iterator(); i.hasNext(); ) {
+                    rcline.append((String)i.next());
+                    if (i.hasNext()) rcline.append(", ");
+                }
+                rcline.append("; ");
+            }
+            // print the rcline
+            if (rcline.length()>0) StringTools.writeKeyValueLine(RC_LINE_TAG, rcline.toString(), 5, this.getLineWidth(), null, RC_LINE_TAG, this.getPrintStream());
+            // Deal with RX and rest
+            CrossRef c = d.getCrossref();
+            if (c!=null) StringTools.writeKeyValueLine(REFERENCE_XREF_TAG, c.getDbname()+"="+c.getAccession()+";", 5, this.getLineWidth(), null, REFERENCE_XREF_TAG, this.getPrintStream());
+            List<DocRefAuthor> auths = d.getAuthorList();
+            for (Iterator<DocRefAuthor> j = auths.iterator(); j.hasNext(); ) {
+                DocRefAuthor a = j.next();
+                if (a.isConsortium()) {
+                    StringTools.writeKeyValueLine(CONSORTIUM_TAG, a.getName()+";", 5, this.getLineWidth(), null, CONSORTIUM_TAG, this.getPrintStream());
+                    j.remove();
+                }
+            }
+            if (!auths.isEmpty()) StringTools.writeKeyValueLine(AUTHORS_TAG, DocRefAuthor.Tools.generateAuthorString(auths, false)+";", 5, this.getLineWidth(), null, AUTHORS_TAG, this.getPrintStream());
+            if (d.getTitle()!=null && d.getTitle().length()!=0) StringTools.writeKeyValueLine(TITLE_TAG, "\""+d.getTitle()+"\";", 5, this.getLineWidth(), null, TITLE_TAG, this.getPrintStream());
+            StringTools.writeKeyValueLine(LOCATION_TAG, d.getLocation()+".", 5, this.getLineWidth(), null, LOCATION_TAG, this.getPrintStream());
+        }
+
+        // comments - if any
+        if (!rs.getComments().isEmpty()) {
+            for (Iterator<Comment> i = rs.getComments().iterator(); i.hasNext(); ) {
+                Comment c = i.next();
+                String text = c.getComment().trim();
+                if (text.length()>3 && text.substring(0,3).equals("-!-")) StringTools.writeKeyValueLine(COMMENT_TAG, text, 5, this.getLineWidth(), null, COMMENT_TAG, this.getPrintStream());
+                else StringTools.writeKeyValueLine(COMMENT_TAG, text, 5, this.getLineWidth(), null, COMMENT_TAG, this.getPrintStream());
+            }
+        }
+
+        // copyright - if any
+        if (copyright!=null)
+            StringTools.writeKeyValueLine(COMMENT_TAG, copyright, 5, this.getLineWidth(), null, COMMENT_TAG, this.getPrintStream());
+
+        // db references - ranked
+        for (Iterator<RankedCrossRef> r = rs.getRankedCrossRefs().iterator(); r.hasNext(); ) {
+            RankedCrossRef rcr = r.next();
+            CrossRef c = rcr.getCrossRef();
+            Set<Note> noteset = c.getNoteSet();
+            StringBuffer sb = new StringBuffer();
+            sb.append(c.getDbname());
+            sb.append("; ");
+            sb.append(c.getAccession());
+            boolean hasSecondary = false;
+            for (Iterator<Note> i = noteset.iterator(); i.hasNext(); ) {
+                Note n = i.next();
+                if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
+                    sb.append("; ");
+                    sb.append(n.getValue());
+                    hasSecondary = true;
+                }
+            }
+            if (!hasSecondary) sb.append("; -");
+            sb.append(".");
+            StringTools.writeKeyValueLine(DATABASE_XREF_TAG, sb.toString(), 5, this.getLineWidth(), null, DATABASE_XREF_TAG, this.getPrintStream());
+        }
+
+        // protein exists line
+        if (protExists!=null) {
+            StringTools.writeKeyValueLine(PROTEIN_EXIST_TAG, protExists+";", 5, this.getLineWidth(), null, PROTEIN_EXIST_TAG, this.getPrintStream());
+        }
+
+        // keywords line
+        String keywords = null;
+        for (Iterator<Note> n = notes.iterator(); n.hasNext(); ) {
+            Note nt = n.next();
+            if (nt.getTerm().equals(Terms.getKeywordTerm())) {
+                if (keywords==null) keywords = nt.getValue();
+                else keywords = keywords+"; "+nt.getValue();
+            }
+        }
+        if (keywords!=null) {
+            StringTools.writeKeyValueLine(KEYWORDS_TAG, keywords+".", 5, this.getLineWidth(), null, KEYWORDS_TAG, this.getPrintStream());
+        }
+
+        // feature_type     location
+        for (Iterator i = rs.getFeatureSet().iterator(); i.hasNext(); ) {
+            RichFeature f = (RichFeature)i.next();
+            String desc = "";
+            String ftid = null;
+            for (Iterator<Note> j = f.getNoteSet().iterator(); j.hasNext(); ) {
+                Note n = j.next();
+                if (n.getTerm().equals(Terms.getFTIdTerm())) ftid = n.getValue();
+                else if (n.getTerm().equals(Terms.getFeatureDescTerm())) desc = n.getValue();
+            }
+            String kw = f.getTypeTerm().getName();
+            String leader = StringTools.rightPad(kw,8)+" "+UniProtLocationParser.writeLocation((RichLocation)f.getLocation());
+            if(desc.length()==0)
+                this.getPrintStream().println(FEATURE_TAG+"   "+leader); //see #2277
+            else
+                StringTools.writeKeyValueLine(FEATURE_TAG+"   "+leader, desc+".", 34, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+            if (ftid!=null) StringTools.writeKeyValueLine(FEATURE_TAG, "/FTId="+ftid+".", 34, this.getLineWidth(), null, FEATURE_TAG, this.getPrintStream());
+        }
+
+        // sequence header
+        int mw = 0;
+        try {
+            mw = (int) MassCalc.getMolecularWeight(rs);
+        } catch (IllegalSymbolException e) {
+            throw new RuntimeException("Found illegal symbol", e);
+        }
+        CRC64Checksum crc = new CRC64Checksum();
+        String seqstr = rs.seqString();
+        crc.update(seqstr.getBytes(),0,seqstr.length());
+        this.getPrintStream().print(START_SEQUENCE_TAG+"   SEQUENCE  "+StringTools.leftPad(""+rs.length(),4)+" AA;  ");
+        this.getPrintStream().print(StringTools.leftPad(""+mw,5)+" MW;  ");
+        this.getPrintStream().println(crc+" CRC64;");
+
+        // sequence stuff
+        Symbol[] syms = (Symbol[])rs.toList().toArray(new Symbol[0]);
+        int symCount = 0;
+        this.getPrintStream().print("    ");
+        for (int i = 0; i < syms.length; i++) {
+            if (symCount % 60 == 0 && symCount>0) {
+                this.getPrintStream().print("\n    ");
+            }
+            if (symCount % 10 == 0) {
+                this.getPrintStream().print(" ");
+            }
+            try {
+                this.getPrintStream().print(tok.tokenizeSymbol(syms[i]));
+            } catch (IllegalSymbolException e) {
+                throw new RuntimeException("Found illegal symbol: "+syms[i]);
+            }
+            symCount++;
+        }
+        this.getPrintStream().print("\n");
+        this.getPrintStream().println(END_SEQUENCE_TAG);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getDefaultFormat() {
+        return UNIPROT_FORMAT;
+    }
+
+    /**
+     * Converts the current parse section to a String. Useful for debugging.
+     */
+    String sectionToString(List section){
+        StringBuffer parseBlock = new StringBuffer();
+        for(Iterator i = section.listIterator(); i.hasNext();){
+            String[] part = (String[])i.next();
+            for(int x = 0; x < part.length; x++){
+                parseBlock.append(part[x]);
+                if(x == 0){
+                    parseBlock.append("   "); //the gap will have been trimmed
+                }
+            }
+        }
+        return parseBlock.toString();
+    }
+}
diff --git a/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/ProcessNew.java b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/ProcessNew.java
new file mode 100644
index 0000000..9a080c9
--- /dev/null
+++ b/piflow-bundle/src/main/scala/cn/piflow/bundle/microorganism/util/ProcessNew.java
@@ -0,0 +1,571 @@
+package cn.piflow.bundle.microorganism.util;
+
+
+
+import org.biojava.bio.seq.Feature;
+import org.biojavax.*;
+import org.biojavax.bio.seq.RichFeature;
+import org.biojavax.bio.seq.RichSequence;
+import org.biojavax.ontology.SimpleComparableTerm;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by xiujuan on 2016/3/24.
+ */
+public class ProcessNew {
+
+    static final Logger logger = LoggerFactory.getLogger(ProcessNew.class);
+    static final Pattern dp = Pattern.compile("(\\d{4})");
+    static final Pattern llp = Pattern.compile("(\\S+)\\s([SN])\\s(\\S+)\\s([WE])");
+    static final Pattern submitDatep = Pattern.compile("^Submitted\\s+\\((\\S+)\\)\\s+(.*)$");
+    static final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+    static final SimpleDateFormat format = new SimpleDateFormat("dd-MMM-yyyy", Locale.ENGLISH);
+
+    // static AddressCountryDict dict = AddressCountryDict.getInstance();
+
+    public static HashMap<String,Object> processSingleSequence(RichSequence seq) throws ParseException {
+        //try{
+        // logger.info("doc: " + seq.getAccession());
+
+        HashMap<String,Object> map = new HashMap() ;
+
+
+        map.put("Sequence", seq.seqString());
+        map.put("Accession", seq.getAccession());
+
+        map.put("SequenceLength", seq.getInternalSymbolList().length());
+        if (seq.getTaxon() != null) {
+            map.put("TaxonID", seq.getTaxon().getNCBITaxID());
+            map.put("Organism", seq.getTaxon().getDisplayName());
+        }
+        map.put("Description", seq.getDescription().replace('\n', ' '));
+
+        map.put("Division", seq.getDivision());
+        map.put("Identifier", seq.getIdentifier());
+        map.put("Version", seq.getVersion());
+
+        if (seq.getCircular()) {
+            map.put("Topology", "Circular");
+        } else {
+            map.put("Topology", "Linear");
+        }
+
+
+        for (Note note : seq.getNoteSet()) {
+            String noteName = note.getTerm().toString().substring(9);
+            if (noteName.indexOf("moltype") != -1) {
+                map.put("MoleculeType", note.getValue());
+            } else if (noteName.indexOf("Organism") != -1) {
+                String organism = note.getValue();
+                //doc.put("Organism", organism.substring(0,organism.indexOf("\n")));
+                map.put("Lineage", organism.substring(organism.indexOf("\n")).replaceAll("\n", ""));
+            } else if (noteName.indexOf("acc") != -1) {
+                map.put("AdditionalAccs", note.getValue());
+            } else if (noteName.indexOf("DBLink") != -1) {   //deal with dblinks
+                JSONArray dbLinks = new JSONArray();
+                String[] val = note.getValue().split("\\n");
+                for (String v : val) {
+                    int index = v.indexOf(":");
+                    if (index != -1) {
+                        JSONObject link = new JSONObject();
+                        link.put(v.substring(0, index), v.substring(index + 1).trim());
+                        dbLinks.put(link);
+                    } else {  // value splitted into more than one line
+                        JSONObject last = dbLinks.getJSONObject(dbLinks.length() - 1);
+                        String key = last.keys().next();
+                        String value = last.get(key).toString();
+                        String newVal = value + v;
+                        last.put(key, newVal);
+                    }
+                }
+                map.put("dbLinks", dbLinks);
+            } else if (noteName.equals("kw")) {
+                map.put("KeyWords", note.getValue());
+            } else if (noteName.equals("udat")) {
+                map.put("dateUpdated", formatter.format(format.parse(note.getValue())));
+            } else {
+                map.put(noteName, note.getValue());
+            }
+        }
+
+        //features
+        JSONArray featureArray = new JSONArray();
+        Iterator<Feature> featureIterator = seq.features();
+        List<String> isolates = new ArrayList<String>();
+        while (featureIterator.hasNext()) {
+            JSONObject featureObject = new JSONObject();
+            List<String> dbxrefArray = new ArrayList<String>();
+            RichFeature feature = (RichFeature) featureIterator.next();
+            for (RankedCrossRef rankedCrossRef : feature.getRankedCrossRefs()) {
+                dbxrefArray.add(rankedCrossRef.getCrossRef().getDbname() + ":" + rankedCrossRef.getCrossRef().getAccession());
+            }
+            featureObject.put("db_xref", dbxrefArray);
+
+            featureObject.put("featureType", feature.getType());
+            Map featureMap = feature.getAnnotation().asMap();
+            Iterator<SimpleComparableTerm> featureKeyIterator = featureMap.keySet().iterator();
+            while (featureKeyIterator.hasNext()) {
+                SimpleComparableTerm term = featureKeyIterator.next();
+                String name = term.getName();
+                String nameValue = featureMap.get(term).toString();
+                //isolate is an array?
+
+                if (name.indexOf("altitude") != -1) {
+                    featureObject.put("altitude_value", Float.valueOf(nameValue.substring(0, nameValue.indexOf(" "))));  //number, take care of negative number
+                } else if (name.indexOf("collection_date") != -1) {
+                    if (getCollectionYear(nameValue) != 0) {
+                        featureObject.put("collection_year", getCollectionYear(nameValue));
+                    }
+                } else if (name.indexOf("country") != -1) {
+                    if (nameValue.indexOf(":") != -1) {
+                        featureObject.put("CollectionCountry", nameValue.substring(0, nameValue.indexOf(":")));
+                    }
+                } else if (name.indexOf("culture_collection") != -1) {
+                    int index = nameValue.indexOf(":") != -1 ? nameValue.indexOf(":") : nameValue.indexOf(" ");
+                    if (index != -1) {
+                        featureObject.put("InstitutionCode", nameValue.substring(0, index));
+                        featureObject.put("CultureID", nameValue.substring(index + 1));
+                    }
+                } else if (name.indexOf("lat_lon") != -1) {
+                    Float[] arr = getLat_Lon(nameValue);
+                    if (arr != null) {
+                        featureObject.put("Latitude", arr[0]);
+                        featureObject.put("Longitude", arr[1]);
+                    }
+                } else if (name.indexOf("pathovar") != -1) {
+
+                } else if (feature.getType().equals("source") && name.equals("isolate")) {
+                    isolates.add(nameValue);
+                }
+                featureObject.put(term.getName(), featureMap.get(term));
+            }
+            featureArray.put(featureObject);
+            //for garbage collection
+            featureObject = null;
+            dbxrefArray = null;
+            feature = null;
+            featureMap = null;
+        }
+        map.put("features", featureArray);
+        if (isolates.size() > 0) {
+            map.put("isolate_all", isolates);
+        }
+        return map;
+    }
+
+    public static int getCollectionYear(String date){
+        Matcher m = dp.matcher(date);
+        String year;
+        if(m.find()){
+            year = m.group(1);
+            return Integer.parseInt(year);
+        }else{
+            return 0;
+        }
+    }
+
+    public static Float[] getLat_Lon(String lat_lon){
+        Matcher m = llp.matcher(lat_lon);
+        Float[] array = null;
+        try{
+            if(m.matches()){
+                array = new Float[2];
+                if(m.group(2).equals("N")){
+                    array[0] = Float.valueOf(m.group(1));
+                }else{
+                    array[0] = Float.valueOf("0")-Float.valueOf(m.group(1));
+                }
+                if(m.group(4).equals("E")){
+                    array[1] = Float.valueOf(m.group(3));
+                }else{
+                    array[1] = Float.valueOf("0")-Float.valueOf(m.group(3));
+                }
+            }
+        }catch (NumberFormatException nfe){
+            return null;
+        }
+        return array;
+    }
+
+    public static void processUniprotSeq(RichSequence seq, JSONObject doc) throws ParseException {
+        logger.info("doc: " + seq.getAccession());
+        doc.put("Accession", seq.getAccession());
+        doc.put("Name", seq.getName());
+        doc.put("Division", seq.getDivision());
+        doc.put("Description", seq.getDescription().replace('\n', ' '));
+        doc.put("Version", seq.getVersion());
+        doc.put("sequencelength", seq.length());
+        //Taxon
+        doc.put("TaxonID", seq.getTaxon().getNCBITaxID());
+        for(Object name: seq.getTaxon().getNameClasses()){
+            doc.put("Taxon_"+(String)name, seq.getTaxon().getNames((String)name));
+        }
+
+        //rankedcrossrefs
+        /*JSONArray rankedCrossRefs = new JSONArray();
+        for(RankedCrossRef rankedCrossRef : seq.getRankedCrossRefs()){
+            JSONObject ref = new JSONObject();
+            String key = rankedCrossRef.getCrossRef().getDbname();
+            String accessions = rankedCrossRef.getCrossRef().getAccession();
+            for(Note note : rankedCrossRef.getCrossRef().getRichAnnotation().getNoteSet()){
+                accessions += ";"+note.getValue();
+            }
+            ref.put(key, accessions);
+            rankedCrossRefs.put(ref);
+        }
+        if(rankedCrossRefs.length() > 0){
+            doc.put("rankedCrossRefs", rankedCrossRefs);
+        }*/
+        processRankedCrossRefs(seq, doc);
+        //comments
+        JSONArray comments = new JSONArray();
+        for(Comment comment : seq.getComments()){
+            JSONObject cmtObj = new JSONObject();
+            String cmt = comment.getComment().replace('\n', ' ');
+            cmt = cmt.substring(3);
+            int index = cmt.indexOf(":");
+            cmtObj.put(cmt.substring(0,index).trim(),cmt.substring(index+1).trim());
+            comments.put(cmtObj);
+        }
+        if(comments.length() > 0){
+            doc.put("comments", comments);
+        }
+        //features
+        JSONArray features = new JSONArray();
+        Iterator<Feature> featureIterator = seq.features();
+        while(featureIterator.hasNext()){
+            JSONObject featureObject = new JSONObject();
+            List<String> dbxrefArray = new ArrayList<String>();
+            RichFeature feature = (RichFeature)featureIterator.next();
+            for(RankedCrossRef rankedCrossRef : feature.getRankedCrossRefs()){
+                dbxrefArray.add(rankedCrossRef.getCrossRef().getDbname() + ":" + rankedCrossRef.getCrossRef().getAccession());
+            }
+            if(dbxrefArray.size() > 0){
+                featureObject.put("rankedCrossRefs", dbxrefArray);
+            }
+            featureObject.put("type", feature.getType());
+            featureObject.put("location_start", feature.getLocation().getMin());
+            featureObject.put("location_end", feature.getLocation().getMax());
+            Map featureMap = feature.getAnnotation().asMap();
+            Iterator<SimpleComparableTerm> featureKeyIterator = featureMap.keySet().iterator();
+            while(featureKeyIterator.hasNext()){
+                SimpleComparableTerm term = featureKeyIterator.next();
+                featureObject.put(term.getName(),featureMap.get(term));
+            }
+            features.put(featureObject);
+        }
+        if(features.length() > 0){
+            doc.put("features", features);
+        }
+        //sequence
+        doc.put("sequence", seq.seqString());
+
+        JSONArray rankedDocRefs = new JSONArray();
+        Map<Integer,List<String>> rankedDocRefs_addiInfo = new HashMap<Integer, List<String>>();
+        //properties from notes: rlistener.addSequenceProperty
+        List<String> keywords = new ArrayList<String>();
+        List<String> secondaryAccs = new ArrayList<String>();
+        JSONArray organismHosts = new JSONArray();
+        for(Note note : seq.getNoteSet()){
+            String note_term = note.getTerm().getName();
+            if(note_term.equals("kw")){
+                keywords.add(note.getValue());
+            }else if(note_term.equals("cdat")){
+                doc.put("dateCreated", formatter.format(format.parse(note.getValue())));
+            }else if(note_term.equals("udat")){
+                doc.put("dateUpdated", formatter.format(format.parse(note.getValue())));
+            }else if(note_term.equals("adat")){
+                doc.put("dateAnnotated", formatter.format(format.parse(note.getValue())));
+            }else if(note_term.equals("arel")){
+                doc.put("relAnnotated", note.getValue());
+            }else if(note_term.equals("Organism host")){
+                JSONObject organismHost = new JSONObject();
+                String sciname;
+                String comname;
+                String names = null;
+                List synonym = new ArrayList();
+                String[] parts = note.getValue().split(";");
+                if(parts[0].matches("\\S+=\\S+")){
+                    String[] moreparts = parts[0].split("=");
+                    if(moreparts[0].equals("NCBI_TaxID")){
+                        organismHost.put("NCBI_TaxID",Integer.parseInt(moreparts[1]));
+                    }else{
+                        organismHost.put(moreparts[0],moreparts[1]);
+                    }
+                }else{
+                    names = parts[0];
+                }
+                if(parts.length > 1){
+                    names = parts[1];
+                }
+                if(names != null){
+                    if (names.endsWith(".")) names = names.substring(0,names.length()-1); // chomp trailing dot
+                    String[] nameparts = names.split("\\(");
+                    sciname = nameparts[0].trim();
+                    organismHost.put("scientific name", sciname);
+                    if (nameparts.length>1) {
+                        comname = nameparts[1].trim();
+                        if (comname.endsWith(")")) comname = comname.substring(0,comname.length()-1); // chomp trailing bracket
+                        organismHost.put("common name", comname);
+                        if (nameparts.length>2) {
+                            // synonyms
+                            for (int j = 2 ; j < nameparts.length; j++) {
+                                String syn = nameparts[j].trim();
+                                if (syn.endsWith(")")) syn = syn.substring(0,syn.length()-1); // chomp trailing bracket
+                                synonym.add(syn);
+                            }
+                            organismHost.put("synonym", synonym);
+                        }
+                    }
+                }
+                organismHosts.put(organismHost);
+            }else if(note_term.equals("Sequence meta info")){
+                String seqMetaInfo = note.getValue();
+                if(seqMetaInfo.startsWith("SEQUENCE")){
+                    seqMetaInfo = seqMetaInfo.substring(8);
+                }
+                String[] parts = seqMetaInfo.split(";");
+                if(parts.length > 1){
+                    doc.put("molecular weight", Integer.parseInt(parts[1].trim().split(" ")[0]));
+                    if(parts.length > 2){
+                        String[] moreparts = parts[2].trim().split(" ");
+                        doc.put(moreparts[1], moreparts[0]);
+                    }
+                }
+            }else if(note_term.startsWith("docref")){
+                int rank = Integer.parseInt(note.getValue().split(":")[0].trim());
+                String key = note_term.substring(7);  //remove the precedding "docref_"
+                if(key.contains("biojavax:")){
+                    key = key.substring(9);   //remove "biojavax:"
+                }
+                String value = note.getValue().substring(note.getValue().indexOf(":")+1).trim();
+                if(rankedDocRefs_addiInfo.containsKey(rank)){
+                    rankedDocRefs_addiInfo.get(rank).add(key+":"+value);
+                }else{
+                    List<String> tmp = new ArrayList<String>();
+                    tmp.add( key+":"+value);
+                    rankedDocRefs_addiInfo.put(rank,tmp);
+                }
+            }else if(note_term.equals("acc")){
+                secondaryAccs.add(note.getValue());
+            }else{
+                doc.put(note_term, note.getValue());
+            }
+        }
+        if(secondaryAccs.size() > 0){
+            doc.put("secondaryacc",secondaryAccs);
+        }
+        if(organismHosts.length() > 0){
+            doc.put("organismhost", organismHosts);
+        }
+        if(keywords.size() > 0){
+            doc.put("keywords", keywords);
+        }
+
+        //rankeddocref
+        for(RankedDocRef rankedDocRef : seq.getRankedDocRefs()){
+            JSONObject rankedDocRefObj = new JSONObject();
+            DocRef docRef = rankedDocRef.getDocumentReference();
+            rankedDocRefObj.put("rank", rankedDocRef.getRank());
+            rankedDocRefObj.put("authors", docRef.getAuthors());
+            rankedDocRefObj.put("title", docRef.getTitle());
+            rankedDocRefObj.put("location", docRef.getLocation());
+            rankedDocRefObj.put("remark", docRef.getRemark());
+            for(Map.Entry entry : rankedDocRefs_addiInfo.entrySet()){
+                if((Integer)(entry.getKey()) == rankedDocRef.getRank()){
+                    for(String pair : (List<String>)(entry.getValue())){
+                        int index = pair.indexOf(":");
+                        rankedDocRefObj.put(pair.substring(0, index),pair.substring(index+1));
+                    }
+                }
+            }
+            rankedDocRefs.put(rankedDocRefObj);
+        }
+        if(rankedDocRefs.length() > 0){
+            doc.put("rankedDocRefs", rankedDocRefs);
+        }
+    }
+
+    public static void processEMBL_EnsemblSeq(RichSequence seq,JSONObject doc) throws ParseException {
+        logger.info("accession: " + seq.getName());
+        if(seq.getCircular()){
+            doc.put("Topology", "Circular");
+        }else{
+            doc.put("Topology", "Linear");
+        }
+        for(Note note : seq.getNoteSet()){
+            String noteName = note.getTerm().toString().substring(9);
+            if(noteName.equals("moltype")){
+                doc.put("Molecule type", note.getValue());
+            }else if(noteName.equals("organism")){
+                doc.put("Classfication", note.getValue().replaceAll("\n", ""));
+            }else if(noteName.equals("kw")){
+                doc.put("KeyWords", note.getValue());
+            }else if(noteName.equals("udat")){
+                doc.put("dateUpdated", formatter.format(format.parse(note.getValue())));
+            }else if(noteName.equals("cdat")){
+                doc.put("dateCreated", formatter.format(format.parse(note.getValue())));
+            }else{
+                doc.put(noteName, note.getValue());
+            }
+        }
+        doc.put("SequenceLength", seq.getInternalSymbolList().length());
+        doc.put("Description", seq.getDescription().replace('\n', ' '));
+        //System.out.println(seq.getInternalSymbolList().length());
+        //doc.put("Sequence length", seq.getInternalSymbolList().length());
+        doc.put("Accession", seq.getName());
+        doc.put("Organism",seq.getTaxon().getDisplayName());
+        doc.put("TaxonID", seq.getTaxon().getNCBITaxID());
+
+        /*for (RankedDocRef rankDocRef : seq.getRankedDocRefs()){
+            if(rankDocRef.getDocumentReference().getLocation().indexOf("Submitted") != -1){
+                int dotindex = rankDocRef.getDocumentReference().getLocation().indexOf(".");
+                String submitDate = rankDocRef.getDocumentReference().getLocation().substring(11,22);
+                String submitAddress = rankDocRef.getDocumentReference().getLocation().substring(dotindex+1).trim();
+                doc.put("SubmitDate", format.parse(submitDate));
+                doc.put("SubmittedAddress", rankDocRef.getDocumentReference().getLocation().substring(dotindex+1).trim());
+            }
+        }*/
+        //rankedDocRefs
+        //processRankedDocRefs(seq, doc);
+
+        //rankedCrossRef
+        processRankedCrossRefs(seq, doc);
+
+        //comments
+        processComment(seq, doc);
+
+        //features
+        JSONArray featureArray = new JSONArray();
+        Iterator<Feature> featureIterator = seq.features();
+        while (featureIterator.hasNext()){
+            JSONObject featureObject = new JSONObject();
+            List<String> dbxrefArray = new ArrayList<String>();
+            RichFeature feature = (RichFeature)featureIterator.next();
+            //deal with db_xref in each feature
+            //db_xref is not required in the requirement
+            for(RankedCrossRef rankedCrossRef : feature.getRankedCrossRefs()){
+                dbxrefArray.add(rankedCrossRef.getCrossRef().getDbname() + ":" + rankedCrossRef.getCrossRef().getAccession());
+            }
+            featureObject.put("db_xref", dbxrefArray);
+
+            featureObject.put("featureType", feature.getType());
+            Map featureMap = feature.getAnnotation().asMap();
+            Iterator<SimpleComparableTerm> featureKeyIterator = featureMap.keySet().iterator();
+            while(featureKeyIterator.hasNext()){
+                SimpleComparableTerm term = featureKeyIterator.next();
+                String name = term.getName();
+                String nameValue = featureMap.get(term).toString();
+
+                if(name.equals("altitude")){
+                    featureObject.put("altitude_value", Float.valueOf(nameValue.substring(0,nameValue.indexOf("m")).trim()));  //number, take care of negative number
+                }else if(name.equals("collection_date")){
+                    JSONArray collectionDates = new JSONArray();
+                    for(String singleDate : nameValue.split("/")){
+                        JSONObject collectionDate = new JSONObject();
+                        if(singleDate.endsWith("FT")){
+                            singleDate = singleDate.substring(0, singleDate.length()-2);
+                        }
+                        if(singleDate.matches("\\d{2}-\\w{3}-\\d{4}")){
+                            collectionDate.put("collection_date", formatter.format(format.parse(singleDate)));
+                        }else{
+                            collectionDate.put("collection_date", singleDate);
+                        }
+
+                        collectionDate.put("collection_year", getCollectionYear(singleDate));
+                        collectionDates.put(collectionDate);
+                    }
+                    featureObject.put("collectionDate", collectionDates);
+                }
+                featureObject.put(term.getName(),featureMap.get(term));
+            }
+            featureArray.put(featureObject);
+        }
+        doc.put("features", featureArray);
+    }
+
+    public static void processRankedCrossRefs(RichSequence seq, JSONObject doc){
+        JSONArray rankedCrossRefs = new JSONArray();
+        for(RankedCrossRef rankedCrossRef : seq.getRankedCrossRefs()){
+            JSONObject ref = new JSONObject();
+            String key = rankedCrossRef.getCrossRef().getDbname();
+            String accessions = rankedCrossRef.getCrossRef().getAccession();
+            for(Note note : rankedCrossRef.getCrossRef().getRichAnnotation().getNoteSet()){
+                accessions += ";"+note.getValue();
+            }
+            ref.put(key, accessions);
+            rankedCrossRefs.put(ref);
+        }
+        if(rankedCrossRefs.length() > 0){
+            doc.put("rankedCrossRefs", rankedCrossRefs);
+        }
+    }
+
+//    public static void processRankedDocRefs(RichSequence seq, JSONObject doc) throws ParseException {
+//        JSONArray rankedDocRefs = new JSONArray();
+//        for(RankedDocRef rankedDocRef : seq.getRankedDocRefs()){
+//            DocRef docRef = rankedDocRef.getDocumentReference();
+//            JSONObject rankedRef = new JSONObject();
+//            rankedRef.put("authors", docRef.getAuthors());
+//            rankedRef.put("title", docRef.getTitle());
+//            if(docRef.getCrossref() != null){
+//                String dbName = docRef.getCrossref().getDbname();
+//                if(dbName.equals("PUBMED")){
+//                    rankedRef.put(dbName, Integer.parseInt(docRef.getCrossref().getAccession()));
+//                }else{
+//                    rankedRef.put(dbName, docRef.getCrossref().getAccession());
+//                }
+//            }
+//            Matcher m = submitDatep.matcher(docRef.getLocation().replaceAll("\n", " "));
+//            if(m.matches()){
+//                rankedRef.put("SubmitDate", formatter.format(format.parse(m.group(1))));
+//                rankedRef.put("SubmitAddress", m.group(2));
+//                int year = Integer.parseInt(m.group(1).substring(m.group(1).lastIndexOf("-")+1));
+//                rankedRef.put("SubmitYear", year);
+//                //submitCountry--extract from SubmitAddress
+//                String countryName = dict.mappingCountry(m.group(2));
+//                if(countryName != null){
+//                    rankedRef.put("SubmitCountry", countryName);
+//                }
+//            }
+//            rankedDocRefs.put(rankedRef);
+//        }
+//        doc.put("rankedDocRefs", rankedDocRefs);
+//    }
+
+    public static void processComment(RichSequence seq, JSONObject doc){
+        Map<String, String> commentMetaData = new HashMap<String, String>();
+        JSONArray comments = new JSONArray();
+        for(Comment comment: seq.getComments()){
+            JSONObject commentObj = new JSONObject();
+            if(comment.getComment().indexOf("::") != -1){
+                String comm[] = comment.getComment().split("\n");
+                for(int i = 0; i < comm.length; i++){
+                    if(comm[i].matches("(.*)\\s+::\\s+(.*)")){
+                        String[] metaData = comm[i].split("::");
+                        String key = metaData[0].trim();
+                        String value = metaData[1].trim();
+                        if(key.contains(".")){
+                            key = key.replaceAll("\\.", " ");
+                        }
+                        commentMetaData.put(key, value);
+                    }
+                }
+                commentObj.put("commentMeta", commentMetaData);
+            }else{
+                commentObj.put("comment", comment.getComment());
+            }
+            comments.put(commentObj);
+        }
+        doc.put("comments", comments);
+    }
+}
diff --git a/piflow-bundle/src/test/scala/cn/piflow/bundle/ftp/emblTest.scala b/piflow-bundle/src/test/scala/cn/piflow/bundle/ftp/emblTest.scala
new file mode 100644
index 0000000..e4d1626
--- /dev/null
+++ b/piflow-bundle/src/test/scala/cn/piflow/bundle/ftp/emblTest.scala
@@ -0,0 +1,87 @@
+package cn.piflow.bundle.ftp
+
+import cn.piflow.Runner
+import cn.piflow.conf.bean.FlowBean
+import cn.piflow.conf.util.{FileUtil, OptionUtil}
+import org.apache.spark.sql.SparkSession
+import org.h2.tools.Server
+import org.jsoup.Jsoup
+import org.jsoup.select.Elements
+import org.junit.Test
+
+import scala.util.parsing.json.JSON
+
+class emblTest {
+
+  @Test
+  def testEmblDataParse(): Unit ={
+
+    //parse flow json
+//    val file = "src/main/resources/yqd/down.json"
+//val file = "src/main/resources/yqd/refseq_genome.json"
+//val file = "src/main/resources/yqd/select_unzip.json"
+val file = "src/main/resources/yqd/embl_parser.json"
+
+    val flowJsonStr = FileUtil.fileReader(file)
+
+    val map = OptionUtil.getAny(JSON.parseFull(flowJsonStr)).asInstanceOf[Map[String, Any]]
+    println(map)
+
+    //create flow
+    val flowBean = FlowBean(map)
+    val flow = flowBean.constructFlow()
+
+    val h2Server = Server.createTcpServer("-tcp", "-tcpAllowOthers", "-tcpPort","50001").start()
+    //execute flow
+    val spark = SparkSession.builder()
+      .master("spark://10.0.88.70:7077")
+      .appName("Embl")
+      .config("spark.driver.memory", "8g")
+      .config("spark.executor.memory", "16g")
+      .config("spark.cores.max", "16")
+      .config("spark.jars","/root/Desktop/weishengwu/out/artifacts/piflow_bundle/piflow_bundle.jar")
+      .enableHiveSupport()
+      .getOrCreate()
+
+    val process = Runner.create()
+      .bind(classOf[SparkSession].getName, spark)
+      .bind("checkpoint.path", "hdfs://10.0.86.89:9000/xjzhu/piflow/checkpoints/")
+      .start(flow);
+
+    process.awaitTermination();
+    val pid = process.pid();
+    println(pid + "!!!!!!!!!!!!!!!!!!!!!")
+    spark.close();
+  }
+
+
+  @Test
+  def testEmblDataParse11(): Unit ={
+
+    val url ="http://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/"
+    val doc = Jsoup.connect(url).timeout(100000000).get()
+    //  获取 url 界面   文件名字  日期   大小
+    //  Name                    Last modified      Size  Parent Directory                             -
+    //  build_gbff_cu.pl        2003-04-25 17:23   21K
+
+    val elements: Elements = doc.select("html >body >table >tbody")
+//    println(elements)
+    println(elements.first().text())
+
+    // 按行 分割 elements 为单个字符串
+    val fileString = elements.first().text().split("\\n")
+
+
+    for (i <- 0 until fileString.size) {
+
+     println(fileString(i))
+    }
+
+    println(fileString)
+  }
+
+
+
+
+
+}

From dc652eb798fdd9b972025d1310f015a3321e9d07 Mon Sep 17 00:00:00 2001
From: judy0131 <xjzhu@cnic.cn>
Date: Mon, 24 Dec 2018 11:23:53 +0800
Subject: [PATCH 2/3] Create doc

---
 doc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc

diff --git a/doc b/doc
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/doc
@@ -0,0 +1 @@
+

From 01e6a5c3f445508439f367bfe5a334c43524e065 Mon Sep 17 00:00:00 2001
From: judy0131 <xjzhu@cnic.cn>
Date: Mon, 24 Dec 2018 11:24:13 +0800
Subject: [PATCH 3/3] Delete doc

---
 doc | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 doc

diff --git a/doc b/doc
deleted file mode 100644
index 8b13789..0000000
--- a/doc
+++ /dev/null
@@ -1 +0,0 @@
-