Compare commits

...

4 Commits

Author SHA1 Message Date
yanggang 6f25453768 change group 2019-04-09 15:34:13 +08:00
yanggang 676114c6b0 change group 2019-03-29 14:21:14 +08:00
yanggang 791aecfb26 change group 2019-03-29 14:13:41 +08:00
yanggang ba50669c27 updata 2019-03-28 17:50:39 +08:00
31 changed files with 379 additions and 31 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -0,0 +1,125 @@
package cn.piflow.bundle.asr
import java.io.{File, FileNotFoundException}
import cn.piflow._
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.http.entity.ContentType
import org.apache.http.util.EntityUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.json.JSONObject
import scala.collection.mutable.ArrayBuffer
class ASR extends ConfigurableStop {
val authorEmail: String = "huchuan0901@163.com"
val description: String = "Speech recognition"
val inportList: List[String] = List(PortEnum.AnyPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
//val url = "http://10.0.86.128:8081/service/classify/asr/"
var audioPath:String = _
var url:String =_
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
val session: SparkSession = pec.get[SparkSession]()
//read
var paths = new ArrayBuffer[String]()
session.read.textFile(audioPath).rdd.collect().foreach(each =>{
paths+=each
})
//
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.HttpPost
import org.apache.http.entity.mime.MultipartEntityBuilder
import org.apache.http.impl.client.HttpClientBuilder
val httpClient = HttpClientBuilder.create.build
val requestConfig = RequestConfig.custom.setConnectTimeout(200000).setSocketTimeout(200000000).build
var rows: ArrayBuffer[Row] = new ArrayBuffer[Row]
paths.foreach(path =>{
val httpPost = new HttpPost(url)
httpPost.setConfig(requestConfig)
val multipartEntityBuilder = MultipartEntityBuilder.create
// try {
val file = new File(path)
// if (file.exists()) {
//println("start post")
multipartEntityBuilder.addBinaryBody("audio1", file,ContentType.create("audio/wav"),"audio1")
val httpEntity = multipartEntityBuilder.build
httpPost.setEntity(httpEntity)
val httpResponse = httpClient.execute(httpPost)
val responseEntity = httpResponse.getEntity
val statusCode = httpResponse.getStatusLine.getStatusCode
if (statusCode == 200) {
val result = new JSONObject(EntityUtils.toString(responseEntity))
val arr = Array(path,result.getString("error"),result.getString("value"),result.getBoolean("res").toString)
rows+= Row.fromSeq(arr)
}
if (httpResponse != null) httpResponse.close
// }
// }
// catch{
// case e: FileNotFoundException => println("file not found: "+path)
// }
})
httpClient.close
val rowRDD: RDD[Row] = session.sparkContext.makeRDD(rows)
val schema: StructType = StructType(Array(
StructField("audiopath",StringType),
StructField("error",StringType),
StructField("value",StringType),
StructField("res",StringType)
))
val df: DataFrame = session.createDataFrame(rowRDD,schema)
out.write(df)
}
def initialize(ctx: ProcessContext): Unit = {
}
def setProperties(map : Map[String, Any]) = {
audioPath = MapUtil.get(map,"audioPath").asInstanceOf[String]
url = MapUtil.get(map,"url").asInstanceOf[String]
}
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
var descriptor : List[PropertyDescriptor] = List()
val audioPath = new PropertyDescriptor().name("audioPath").displayName("audioPath").description("The path of audio file").defaultValue("").required(true)
descriptor = audioPath :: descriptor
val url = new PropertyDescriptor().name("url").displayName("url").description("The url of the API").defaultValue("http://10.0.86.128:8081/service/classify/asr/").required(false)
descriptor = url :: descriptor
descriptor
}
override def getIcon(): Array[Byte] = {
ImageUtil.getImage("icon/asr/ASR.png")
}
override def getGroup(): List[String] = {
List(StopGroup.Alg_ASRGroup.toString)
}
}

View File

@ -68,11 +68,6 @@ class SelectFilesByName extends ConfigurableStop{
val schema: StructType = StructType(fields)
val df: DataFrame = session.createDataFrame(rowRDD,schema)
println("#################################################")
df.show(20)
println(df.count+"#################################################")
out.write(df)
}

View File

@ -0,0 +1,123 @@
package cn.piflow.bundle.imageProcess
import java.io.{File, FileNotFoundException}
import cn.piflow._
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.http.entity.ContentType
import org.apache.http.util.EntityUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.json.JSONObject
import scala.collection.mutable.ArrayBuffer
class ImageProcess extends ConfigurableStop {
val authorEmail: String = "huchuan0901@163.com"
val description: String = "Image classification"
val inportList: List[String] = List(PortEnum.DefaultPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
//val url = "http://10.0.86.128:8081/service/classify/dogorcat/"
var imagePath:String = _
var url:String = _
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
val session: SparkSession = pec.get[SparkSession]()
//read
var paths = new ArrayBuffer[String]()
session.read.textFile(imagePath).rdd.collect().foreach(each =>{
paths+=each
})
//
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.HttpPost
import org.apache.http.entity.mime.MultipartEntityBuilder
import org.apache.http.impl.client.HttpClientBuilder
val httpClient = HttpClientBuilder.create.build
val requestConfig = RequestConfig.custom.setConnectTimeout(200000).setSocketTimeout(200000000).build
var rows: ArrayBuffer[Row] = new ArrayBuffer[Row]
paths.foreach(path =>{
val httpPost = new HttpPost(url)
httpPost.setConfig(requestConfig)
val multipartEntityBuilder = MultipartEntityBuilder.create
// try {
val file = new File(path)
// if (file.exists()) {
//println("start post")
multipartEntityBuilder.addBinaryBody("image1", file,ContentType.create("image/png"),"image1")
val httpEntity = multipartEntityBuilder.build
httpPost.setEntity(httpEntity)
val httpResponse = httpClient.execute(httpPost)
val responseEntity = httpResponse.getEntity
val statusCode = httpResponse.getStatusLine.getStatusCode
if (statusCode == 200) {
val result = new JSONObject(EntityUtils.toString(responseEntity))
val arr = Array(path,result.getString("error"),result.getString("value"),result.getBoolean("res").toString)
rows+= Row.fromSeq(arr)
}
if (httpResponse != null) httpResponse.close
// }
// }catch{
// case e: FileNotFoundException => println("file not found: "+path)
// }
})
httpClient.close
val rowRDD: RDD[Row] = session.sparkContext.makeRDD(rows)
val schema: StructType = StructType(Array(
StructField("imagepath",StringType),
StructField("error",StringType),
StructField("value",StringType),
StructField("res",StringType)
))
val df: DataFrame = session.createDataFrame(rowRDD,schema)
out.write(df)
}
def initialize(ctx: ProcessContext): Unit = {
}
def setProperties(map : Map[String, Any]) = {
imagePath = MapUtil.get(map,"imagePath").asInstanceOf[String]
url = MapUtil.get(map,"url").asInstanceOf[String]
}
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
var descriptor : List[PropertyDescriptor] = List()
val imagePath = new PropertyDescriptor().name("imagePath").displayName("imagePath").description("The path of image file").defaultValue("").required(true)
descriptor = imagePath :: descriptor
val url = new PropertyDescriptor().name("url").displayName("url").description("The url of the API").defaultValue("http://10.0.86.128:8081/service/classify/dogorcat/").required(false)
descriptor = url :: descriptor
descriptor
}
override def getIcon(): Array[Byte] = {
ImageUtil.getImage("icon/imageProcess/imageProcess.png")
}
override def getGroup(): List[String] = {
List(StopGroup.Alg_ImageProcessGroup.toString)
}
}

View File

@ -93,7 +93,7 @@ class EmblData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_Sequence)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -36,7 +36,7 @@ class Ensembl extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_Sequence)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -93,7 +93,7 @@ class GenBankData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup.toString)
List(StopGroup.Algorithms_Sequence.toString)
}
def initialize(ctx: ProcessContext): Unit = {

View File

@ -110,7 +110,7 @@ class Gene extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup.toString)
List(StopGroup.Algorithms_OntologyAnnotations.toString)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -120,7 +120,7 @@ class GoData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup.toString)
List(StopGroup.Algorithms_OntologyAnnotations.toString)
}
def initialize(ctx: ProcessContext): Unit = {

View File

@ -111,7 +111,7 @@ class InterproData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup.toString)
List(StopGroup.Algorithms_FunctionalDomain.toString)
}
def initialize(ctx: ProcessContext): Unit = {

View File

@ -211,7 +211,7 @@ class MedlineData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_Literature)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -99,7 +99,7 @@ class MicrobeGenomeData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_Genome)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -82,7 +82,7 @@ class PDBData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_FunctionalDomain)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -37,7 +37,7 @@ class Pathway extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_OntologyAnnotations)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -93,7 +93,7 @@ class PfamData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_FunctionalDomain)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -97,7 +97,7 @@ class RefseqData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_OntologyAnnotations)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -37,7 +37,7 @@ class SwissprotData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup)
List(StopGroup.Algorithms_Sequence)
}
override def initialize(ctx: ProcessContext): Unit = {

View File

@ -337,7 +337,7 @@ class TaxonomyData extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MicroorganismGroup.toString)
List(StopGroup.Algorithms_OntologyAnnotations.toString)
}
def initialize(ctx: ProcessContext): Unit = {

View File

@ -55,7 +55,7 @@ class BisectingKMeansPrediction extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -78,7 +78,7 @@ class BisectingKMeansTraining extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -55,7 +55,7 @@ class GaussianMixturePrediction extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -93,7 +93,7 @@ class GaussianMixtureTraining extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -55,7 +55,7 @@ class KmeansPrediction extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -89,7 +89,7 @@ class KmeansTraining extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -55,7 +55,7 @@ class LDAPrediction extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -125,7 +125,7 @@ class LDATraining extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLCluster.toString)
}
}

View File

@ -128,6 +128,6 @@ class WordToVec extends ConfigurableStop{
}
override def getGroup(): List[String] = {
List(StopGroup.MLGroup.toString)
List(StopGroup.Alg_MLFeature.toString)
}
}

View File

@ -0,0 +1,92 @@
package cn.piflow.bundle.nlp
import cn.piflow._
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import com.huaban.analysis.jieba.JiebaSegmenter.SegMode
import com.huaban.analysis.jieba._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
class NLP extends ConfigurableStop {
val authorEmail: String = "huchuan0901@163.com"
val description: String = "Word segmentation"
val inportList: List[String] = List(PortEnum.AnyPort.toString)
val outportList: List[String] = List(PortEnum.DefaultPort.toString)
var path:String = _
val jiebaSegmenter = new JiebaSegmenter()
var tokenARR:ArrayBuffer[String]=ArrayBuffer()
def segmenter(str:String): Unit ={
var strVar = str
//delete symbol
strVar = strVar.replaceAll( "[\\p{P}+~$`^=|<>~`$^+=|<>¥×+\\s]" , "");
val tokens = jiebaSegmenter.process(strVar,SegMode.SEARCH).asScala
for (token: SegToken <- tokens){
tokenARR += token.word
}
}
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
val session: SparkSession = pec.get[SparkSession]()
//read
val strDF = session.read.text(path)
//segmenter
segmenter(strDF.head().getString(0))
//write df
val rows: List[Row] = tokenARR.map(each => {
var arr:Array[String]=Array(each)
val row: Row = Row.fromSeq(arr)
row
}).toList
val rowRDD: RDD[Row] = session.sparkContext.makeRDD(rows)
val schema: StructType = StructType(Array(
StructField("words",StringType)
))
val df: DataFrame = session.createDataFrame(rowRDD,schema)
out.write(df)
}
def initialize(ctx: ProcessContext): Unit = {
}
def setProperties(map : Map[String, Any]) = {
path = MapUtil.get(map,"path").asInstanceOf[String]
}
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
var descriptor : List[PropertyDescriptor] = List()
val path = new PropertyDescriptor().name("path").displayName("path").description("The path of text file").defaultValue("").required(true)
descriptor = path :: descriptor
descriptor
}
override def getIcon(): Array[Byte] = {
ImageUtil.getImage("icon/nlp/NLP.png")
}
override def getGroup(): List[String] = {
List(StopGroup.Alg_NLPGroup.toString)
}
}

View File

@ -17,8 +17,6 @@ object StopGroup {
val RedisGroup = "Redis"
val SolrGroup = "Solr"
val ESGroup = "ElasticSearch"
val MLGroup = "Machine Learning"
val RDFGroup = "RDF"
val HdfsGroup = "Hdfs"
val MicroorganismGroup = "BioInformatics"
val Spider = "Spider"
@ -28,5 +26,20 @@ object StopGroup {
val ExcelGroup = "Excel"
val HbaseGroup = "Hbase"
val StreamingGroup = "Streaming"
val Neo4jGroup = "Neo4j"
val Alg_NLPGroup = "Algorithms_NLP"
val Alg_ImageProcessGroup = "Algorithms_ImageProcess"
val Alg_ASRGroup = "Algorithms_ASR"
val Neo4jGroup = "Algorithms_Neo4j"
val RDFGroup = "Algorithms_RDF"
val Algorithms_FunctionalDomain="Algorithms_FunctionalDomain"
val Algorithms_Genome="Algorithms_Genome"
val Algorithms_OntologyAnnotations="Algorithms_OntologyAnnotations"
val Algorithms_Sequence="Algorithms_Sequence"
val Algorithms_Literature="Algorithms_Literature"
val Alg_MLCluster = "Algorithms_MachineLearningCluster"
val MLGroup = "Algorithms_MachineLearningClassification"
val Alg_MLFeature = "Algorithms_MachineLearningFeature"
}