forked from opensci/piflow
8.10
This commit is contained in:
parent
1b898fd827
commit
65d73aa9cb
|
@ -0,0 +1,88 @@
|
|||
package cn.piflow.bundle.http
|
||||
|
||||
import java.io._
|
||||
import java.net.{HttpURLConnection, URL}
|
||||
|
||||
import cn.piflow.conf.{ConfigurableStop, HttpGroup, StopGroup}
|
||||
import cn.piflow.conf.bean.PropertyDescriptor
|
||||
import cn.piflow.conf.util.MapUtil
|
||||
import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
|
||||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
class LoadZipFromUrl extends ConfigurableStop{
|
||||
val inportCount: Int = 0
|
||||
val outportCount: Int = 1
|
||||
var url_str:String =_
|
||||
|
||||
def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
|
||||
val url=new URL(url_str)
|
||||
val uc:HttpURLConnection=url.openConnection().asInstanceOf[HttpURLConnection]
|
||||
uc.setDoInput(true)
|
||||
uc.connect()
|
||||
val inputStream:InputStream=uc.getInputStream()
|
||||
val size=uc.getContentLength
|
||||
/*val savePath=new File("/xx/dblp/")
|
||||
if(!savePath.exists()){
|
||||
savePath.mkdir()
|
||||
}
|
||||
println("*********savePath run****************")*/
|
||||
val urlname = url_str.split("/")
|
||||
val len=urlname.length
|
||||
val filename=urlname(len-1)
|
||||
/*val file_path=savePath+"/"+filename
|
||||
val file=new File(file_path)
|
||||
if(file!=null || !file.exists()){
|
||||
file.createNewFile()
|
||||
}
|
||||
println("*********file create************")
|
||||
|
||||
val outputStream=new FileOutputStream(file)*/
|
||||
println(size)
|
||||
var byteArrayOutputStream:ByteArrayOutputStream=new ByteArrayOutputStream()
|
||||
val buffer=new Array[Byte](1024*1024)
|
||||
var byteRead= -1
|
||||
val spark = pec.get[SparkSession]()
|
||||
import spark.sqlContext.implicits._
|
||||
var count=0
|
||||
while(((byteRead=inputStream.read(buffer)) != -1) && (byteRead != -1)){
|
||||
count=count+1
|
||||
println(count+":"+byteRead)
|
||||
byteArrayOutputStream.write(buffer,0,byteRead)
|
||||
}
|
||||
val df_create_start_time=System.currentTimeMillis()
|
||||
val byteArray=byteArrayOutputStream.toByteArray
|
||||
val df=Seq((byteArray,filename)).toDF()
|
||||
val df_create_end_time=System.currentTimeMillis()
|
||||
println("df_create_time="+(df_create_end_time - df_create_start_time))
|
||||
val df_write_start_time=System.currentTimeMillis()
|
||||
out.write(df)
|
||||
val df_write_end_time=System.currentTimeMillis()
|
||||
println("df_write_time="+(df_write_end_time - df_write_start_time))
|
||||
}
|
||||
|
||||
def initialize(ctx: ProcessContext): Unit = {
|
||||
|
||||
}
|
||||
|
||||
|
||||
def setProperties(map: Map[String, Any]): Unit = {
|
||||
url_str=MapUtil.get(map,key="url_str").asInstanceOf[String]
|
||||
//file_Path=MapUtil.get(map,key="file_Path").asInstanceOf[String]
|
||||
}
|
||||
|
||||
override def getPropertyDescriptor(): List[PropertyDescriptor] = {
|
||||
var descriptor : List[PropertyDescriptor] = null
|
||||
val url_str = new PropertyDescriptor().name("url_str").displayName("URL").defaultValue("").required(true)
|
||||
//val file_Path = new PropertyDescriptor().name("file_Path").displayName("File_Path").defaultValue("").required(true)
|
||||
descriptor = url_str :: descriptor
|
||||
//descriptor = file_Path :: descriptor
|
||||
descriptor
|
||||
}
|
||||
|
||||
override def getIcon(): Array[Byte] = ???
|
||||
|
||||
override def getGroup(): StopGroup = {
|
||||
HttpGroup
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue