stages of process
This commit is contained in:
parent
ab3a86c6c3
commit
f3391ddcf1
|
@ -0,0 +1,58 @@
|
|||
package cn.piflow
|
||||
|
||||
import java.io.File
|
||||
|
||||
import scala.collection.mutable.{Map => MMap}
|
||||
|
||||
object FlowElement {
|
||||
def fromFile(file: File): FlowElement = {
|
||||
null;
|
||||
}
|
||||
|
||||
def saveFile(flowElement: FlowElement, file: File): Unit = {
|
||||
}
|
||||
}
|
||||
|
||||
class FlowElement {
|
||||
def build(): Flow = {
|
||||
null;
|
||||
}
|
||||
}
|
||||
|
||||
class SparkProcessElement {
|
||||
def build(): SparkProcess = {
|
||||
null;
|
||||
}
|
||||
}
|
||||
|
||||
trait FlowElementManager {
|
||||
def list(): Seq[(String, FlowElement)];
|
||||
|
||||
def get(name: String): Option[FlowElement];
|
||||
|
||||
def add(name: String, flowJson: FlowElement): Unit;
|
||||
|
||||
def delete(name: String): Unit;
|
||||
}
|
||||
|
||||
class InMemoryFlowElementManager extends FlowElementManager {
|
||||
val items = MMap[String, FlowElement]();
|
||||
|
||||
override def list(): Seq[(String, FlowElement)] = items.toSeq;
|
||||
|
||||
override def get(name: String): Option[FlowElement] = items.get(name);
|
||||
|
||||
override def delete(name: String) = items - name;
|
||||
|
||||
override def add(name: String, flowJson: FlowElement) {
|
||||
items(name) = flowJson;
|
||||
}
|
||||
}
|
||||
|
||||
class SqlFlowElementManager /* extends FlowJsonManager */ {
|
||||
|
||||
}
|
||||
|
||||
class FileSystemFlowElementManager {
|
||||
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
package cn.piflow
|
||||
|
||||
import cn.piflow.util.Logging
|
||||
|
||||
import scala.collection.mutable.{ArrayBuffer, Map => MMap}
|
||||
|
||||
trait Event {
|
||||
|
@ -24,7 +25,7 @@ trait EventHandler {
|
|||
}
|
||||
|
||||
trait EventEmiter {
|
||||
def fire(event: Event, args: Any): Unit;
|
||||
def fire(event: Event, args: Any = None): Unit;
|
||||
|
||||
def on(event: Event, handler: EventHandler): Unit;
|
||||
}
|
||||
|
@ -40,7 +41,7 @@ class EventEmiterImpl extends EventEmiter with Logging {
|
|||
logger.debug(s"listening on $event, listener: $handler");
|
||||
}
|
||||
|
||||
def fire(event: Event, args: Any = None): Unit = {
|
||||
def fire(event: Event, args: Any): Unit = {
|
||||
logger.debug(s"fired event: $event, args: $args");
|
||||
if (listeners.contains(event)) {
|
||||
for (listener <- listeners(event)) {
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package cn.piflow
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
|
||||
import cn.piflow.util.Logging
|
||||
import cn.piflow.util.{IdGenerator, Logging}
|
||||
import org.quartz.Trigger.CompletedExecutionInstruction
|
||||
import org.quartz.impl.StdSchedulerFactory
|
||||
import org.quartz.{Trigger => QuartzTrigger, _}
|
||||
|
@ -13,8 +11,21 @@ import scala.collection.mutable.{ArrayBuffer, Map => MMap}
|
|||
/**
|
||||
* Created by bluejoe on 2018/5/2.
|
||||
*/
|
||||
class Flow {
|
||||
val triggers = ArrayBuffer[Trigger]();
|
||||
|
||||
trait Flow {
|
||||
def getProcessNames(): Seq[String];
|
||||
|
||||
def addProcess(name: String, process: Process): Flow;
|
||||
|
||||
def addTrigger(processName: String, trigger: Trigger): Flow;
|
||||
|
||||
def getProcess(name: String): Process;
|
||||
|
||||
def getTriggers(processName: String): Seq[Trigger];
|
||||
}
|
||||
|
||||
class FlowImpl extends Flow {
|
||||
val triggers = MMap[String, ArrayBuffer[Trigger]]();
|
||||
val processes = MMap[String, Process]();
|
||||
|
||||
def addProcess(name: String, process: Process) = {
|
||||
|
@ -22,21 +33,22 @@ class Flow {
|
|||
this;
|
||||
};
|
||||
|
||||
def addTrigger(trigger: Trigger) = {
|
||||
triggers += trigger;
|
||||
def addTrigger(processName: String, trigger: Trigger) = {
|
||||
val processTriggers = triggers.getOrElseUpdate(processName, ArrayBuffer[Trigger]());
|
||||
processTriggers += trigger;
|
||||
this;
|
||||
}
|
||||
|
||||
def getProcess(name: String) = processes(name);
|
||||
|
||||
def getTriggers() = triggers.toSeq;
|
||||
def getTriggers(processName: String) = triggers.getOrElse(processName, ArrayBuffer[Trigger]()).toSeq;
|
||||
|
||||
override def getProcessNames(): Seq[String] = processes.map(_._1).toSeq;
|
||||
}
|
||||
|
||||
object Runner {
|
||||
val idgen = new AtomicInteger();
|
||||
|
||||
def run(flow: Flow, args: Map[String, Any] = Map()): FlowExecution = {
|
||||
new FlowExecutionImpl("" + idgen.incrementAndGet(), flow, args);
|
||||
new FlowExecutionImpl(flow, args);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,8 +59,6 @@ trait FlowExecution {
|
|||
|
||||
def start(starts: String*);
|
||||
|
||||
def getContext(): ExecutionContext;
|
||||
|
||||
def getFlow(): Flow;
|
||||
|
||||
def stop();
|
||||
|
@ -56,10 +66,6 @@ trait FlowExecution {
|
|||
def getRunningProcesses(): Seq[(String, String)];
|
||||
}
|
||||
|
||||
trait Process {
|
||||
def run(pc: ProcessContext);
|
||||
}
|
||||
|
||||
trait Context {
|
||||
def get(key: String): Any;
|
||||
|
||||
|
@ -73,32 +79,23 @@ trait Context {
|
|||
put(m.runtimeClass.getName, value);
|
||||
}
|
||||
|
||||
trait ExecutionContext extends Context with EventEmiter {
|
||||
trait FlowExecutionContext extends Context with EventEmiter {
|
||||
def getFlow(): Flow;
|
||||
|
||||
def runProcess(processName: String): ProcessExecution;
|
||||
|
||||
def getFlowExecution(): FlowExecution;
|
||||
|
||||
def scheduleProcessRepeatly(cronExpr: String, processName: String): Unit;
|
||||
}
|
||||
|
||||
class ProcessContext(executionContext: ExecutionContext) extends Context {
|
||||
val context = MMap[String, Any]();
|
||||
|
||||
override def get(key: String): Any = {
|
||||
if (context.contains(key))
|
||||
context(key);
|
||||
else
|
||||
executionContext.get(key);
|
||||
};
|
||||
|
||||
override def put(key: String, value: Any): this.type = {
|
||||
context(key) = value;
|
||||
this;
|
||||
};
|
||||
|
||||
def getExecutionContext(): ExecutionContext = executionContext;
|
||||
}
|
||||
|
||||
class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
||||
class FlowExecutionImpl(flow: Flow, args: Map[String, Any])
|
||||
extends FlowExecution with Logging {
|
||||
val id = "flow_excution_" + IdGenerator.getNextId[FlowExecution];
|
||||
|
||||
val execution = this;
|
||||
val executionContext = createContext();
|
||||
|
||||
def start(starts: String*): Unit = {
|
||||
//set context
|
||||
args.foreach { (en) =>
|
||||
|
@ -106,7 +103,11 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
};
|
||||
|
||||
//activates all triggers
|
||||
triggers.foreach(_.activate(executionContext));
|
||||
flow.getProcessNames().foreach { name =>
|
||||
flow.getTriggers(name).foreach { trigger =>
|
||||
trigger.activate(name, executionContext);
|
||||
}
|
||||
}
|
||||
|
||||
quartzScheduler.start();
|
||||
|
||||
|
@ -128,7 +129,8 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
};
|
||||
}
|
||||
|
||||
val executionContext = new EventEmiterImpl() with ExecutionContext {
|
||||
private def createContext(): FlowExecutionContext = {
|
||||
new EventEmiterImpl() with FlowExecutionContext {
|
||||
//listens on LaunchProcess
|
||||
this.on(LaunchProcess(), new EventHandler() {
|
||||
override def handle(event: Event, args: Any): Unit = {
|
||||
|
@ -140,6 +142,10 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
|
||||
def get(key: String): Any = context(key);
|
||||
|
||||
def runProcess(processName: String): ProcessExecution = {
|
||||
new ProcessExecutionImpl(processName, flow.getProcess(processName), executionContext);
|
||||
}
|
||||
|
||||
def put(key: String, value: Any) = {
|
||||
context(key) = value;
|
||||
this;
|
||||
|
@ -170,11 +176,13 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
}
|
||||
|
||||
override def getFlow(): Flow = flow;
|
||||
|
||||
override def getFlowExecution(): FlowExecution = execution;
|
||||
};
|
||||
}
|
||||
|
||||
val quartzScheduler = StdSchedulerFactory.getDefaultScheduler();
|
||||
quartzScheduler.getContext.put("executionContext", executionContext);
|
||||
val triggers = flow.getTriggers();
|
||||
val listeners = ArrayBuffer[FlowExecutionListener]();
|
||||
|
||||
quartzScheduler.getListenerManager.addTriggerListener(new TriggerListener {
|
||||
|
@ -208,8 +216,6 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
}
|
||||
});
|
||||
|
||||
override def getContext() = executionContext;
|
||||
|
||||
override def getId(): String = id;
|
||||
|
||||
override def addListener(listener: FlowExecutionListener): Unit =
|
||||
|
@ -217,21 +223,3 @@ class FlowExecutionImpl(id: String, flow: Flow, args: Map[String, Any])
|
|||
|
||||
override def getFlow(): Flow = flow;
|
||||
}
|
||||
|
||||
class ProcessAsQuartzJob extends Job with Logging {
|
||||
override def execute(context: JobExecutionContext): Unit = {
|
||||
val map = context.getJobDetail.getJobDataMap;
|
||||
val processName = map.get("processName").asInstanceOf[String];
|
||||
val executionContext = context.getScheduler.getContext.get("executionContext").asInstanceOf[ExecutionContext];
|
||||
try {
|
||||
executionContext.getFlow().getProcess(processName)
|
||||
.run(new ProcessContext(executionContext));
|
||||
context.setResult(true);
|
||||
}
|
||||
catch {
|
||||
case e: Throwable =>
|
||||
e.printStackTrace();
|
||||
throw new JobExecutionException(s"failed to execute process: $processName", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,20 +1,20 @@
|
|||
package cn.piflow.io
|
||||
|
||||
import cn.piflow.{DataSink, ProcessContext, _}
|
||||
import cn.piflow.{DataSink, ProcessExecutionContext, _}
|
||||
import org.apache.spark.sql._
|
||||
|
||||
case class Console(nlimit: Int = 20) extends DataSink {
|
||||
override def save(data: DataFrame, ctx: ProcessContext): Unit = {
|
||||
override def save(data: DataFrame, ctx: ProcessExecutionContext): Unit = {
|
||||
data.show(nlimit);
|
||||
}
|
||||
}
|
||||
|
||||
case class TextFile(path: String, format: String = FileFormat.TEXT) extends DataSource with DataSink {
|
||||
override def load(ctx: ProcessContext): DataFrame = {
|
||||
override def load(ctx: ProcessExecutionContext): DataFrame = {
|
||||
ctx.get[SparkSession].read.format(format).load(path).asInstanceOf[DataFrame];
|
||||
}
|
||||
|
||||
override def save(data: DataFrame, ctx: ProcessContext): Unit = {
|
||||
override def save(data: DataFrame, ctx: ProcessExecutionContext): Unit = {
|
||||
data.write.format(format).save(path);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
package cn.piflow
|
||||
|
||||
import cn.piflow.util.{IdGenerator, Logging}
|
||||
import org.quartz._
|
||||
|
||||
import scala.collection.mutable.{ArrayBuffer, Map => MMap}
|
||||
|
||||
trait Process {
|
||||
def onPrepare(pec: ProcessExecutionContext): Unit;
|
||||
|
||||
def onCommit(pec: ProcessExecutionContext): Unit;
|
||||
|
||||
def onRollback(pec: ProcessExecutionContext): Unit;
|
||||
|
||||
def onFail(errorStage: ProcessStage, cause: Throwable, pec: ProcessExecutionContext): Unit;
|
||||
}
|
||||
|
||||
abstract class LazyProcess extends Process with Logging {
|
||||
def onPrepare(pec: ProcessExecutionContext): Unit = {
|
||||
logger.warn(s"onPrepare={}, process: $this");
|
||||
}
|
||||
|
||||
def onCommit(pec: ProcessExecutionContext): Unit;
|
||||
|
||||
def onRollback(pec: ProcessExecutionContext): Unit = {
|
||||
logger.warn(s"onRollback={}, process: $this");
|
||||
}
|
||||
|
||||
def onFail(errorStage: ProcessStage, cause: Throwable, pec: ProcessExecutionContext): Unit = {}
|
||||
}
|
||||
|
||||
//TODO: one ProcessExecution with multiple RUNs
|
||||
trait ProcessExecution {
|
||||
def getId(): String;
|
||||
|
||||
def start();
|
||||
|
||||
def getProcessName(): String;
|
||||
|
||||
def getProcess(): Process;
|
||||
|
||||
def getStage(): ProcessStage;
|
||||
|
||||
def handleError(jee: JobExecutionException): Unit;
|
||||
}
|
||||
|
||||
trait ProcessExecutionContext extends Context {
|
||||
def getProcessExecution(): ProcessExecution;
|
||||
|
||||
def setStage(stage: ProcessStage): Unit;
|
||||
|
||||
def getStage(): ProcessStage;
|
||||
|
||||
def setErrorHandler(handler: ErrorHandler): Unit;
|
||||
}
|
||||
|
||||
class ProcessExecutionContextImpl(processExecution: ProcessExecution, executionContext: FlowExecutionContext)
|
||||
extends ProcessExecutionContext {
|
||||
val stages = ArrayBuffer[ProcessStage]();
|
||||
var errorHandler: ErrorHandler = Noop();
|
||||
|
||||
def setStage(stage: ProcessStage) = stages += stage;
|
||||
|
||||
val context = MMap[String, Any]();
|
||||
|
||||
def getProcessExecution() = processExecution;
|
||||
|
||||
def getStage(): ProcessStage = stages.last;
|
||||
|
||||
override def get(key: String): Any = {
|
||||
if (context.contains(key))
|
||||
context(key);
|
||||
else
|
||||
executionContext.get(key);
|
||||
};
|
||||
|
||||
override def put(key: String, value: Any): this.type = {
|
||||
context(key) = value;
|
||||
this;
|
||||
};
|
||||
|
||||
override def setErrorHandler(handler: ErrorHandler): Unit = errorHandler = handler;
|
||||
}
|
||||
|
||||
class ProcessAsQuartzJob extends Job with Logging {
|
||||
override def execute(context: JobExecutionContext): Unit = {
|
||||
val map = context.getJobDetail.getJobDataMap;
|
||||
val processName = map.get("processName").asInstanceOf[String];
|
||||
val executionContext = context.getScheduler.getContext.get("executionContext").asInstanceOf[FlowExecutionContext];
|
||||
|
||||
val pe = executionContext.runProcess(processName);
|
||||
try {
|
||||
pe.start();
|
||||
context.setResult(true);
|
||||
}
|
||||
catch {
|
||||
case e => {
|
||||
val jee = new JobExecutionException(s"failed to execute process: $processName", e);
|
||||
logger.error {
|
||||
val stage = pe.getStage();
|
||||
s"failed to execute process: $processName, stage: $stage, cause: $e"
|
||||
};
|
||||
pe.handleError(jee);
|
||||
throw jee;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ProcessExecutionImpl(processName: String, process: Process, executionContext: FlowExecutionContext)
|
||||
extends ProcessExecution with Logging {
|
||||
val id = "process_excution_" + IdGenerator.getNextId[ProcessExecution];
|
||||
val processExecutionContext = createContext();
|
||||
|
||||
override def getId(): String = id;
|
||||
|
||||
override def start(): Unit = {
|
||||
try {
|
||||
processExecutionContext.setStage(PrepareStart());
|
||||
process.onPrepare(processExecutionContext);
|
||||
processExecutionContext.setStage(PrepareComplete());
|
||||
}
|
||||
catch {
|
||||
case e =>
|
||||
try {
|
||||
logger.warn(s"onPrepare() failed: $e");
|
||||
processExecutionContext.setStage(RollbackStart());
|
||||
process.onRollback(processExecutionContext);
|
||||
processExecutionContext.setStage(RollbackComplete());
|
||||
|
||||
throw e;
|
||||
}
|
||||
catch {
|
||||
case e =>
|
||||
logger.warn(s"onRollback() failed: $e");
|
||||
process.onFail(RollbackStart(), e, processExecutionContext);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
processExecutionContext.setStage(CommitStart());
|
||||
process.onCommit(processExecutionContext);
|
||||
processExecutionContext.setStage(CommitComplete());
|
||||
}
|
||||
catch {
|
||||
case e =>
|
||||
logger.warn(s"onCommit() failed: $e");
|
||||
process.onFail(CommitStart(), e, processExecutionContext);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private def createContext() =
|
||||
new ProcessExecutionContextImpl(this, executionContext);
|
||||
|
||||
override def getProcessName(): String = processName;
|
||||
|
||||
override def getProcess(): Process = process;
|
||||
|
||||
override def handleError(jee: JobExecutionException): Unit = processExecutionContext.errorHandler.handle(jee);
|
||||
|
||||
override def getStage(): ProcessStage = processExecutionContext.getStage();
|
||||
}
|
||||
|
||||
trait ErrorHandler {
|
||||
def handle(jee: JobExecutionException);
|
||||
}
|
||||
|
||||
case class Noop() extends ErrorHandler {
|
||||
def handle(jee: JobExecutionException): Unit = {
|
||||
}
|
||||
}
|
||||
|
||||
case class Retry() extends ErrorHandler {
|
||||
def handle(jee: JobExecutionException): Unit = {
|
||||
jee.setRefireImmediately(true);
|
||||
}
|
||||
}
|
||||
|
||||
case class Abort() extends ErrorHandler {
|
||||
def handle(jee: JobExecutionException): Unit = {
|
||||
jee.setUnscheduleFiringTrigger(true);
|
||||
}
|
||||
}
|
||||
|
||||
case class Fail() extends ErrorHandler {
|
||||
def handle(jee: JobExecutionException): Unit = {
|
||||
jee.setUnscheduleAllTriggers(true);
|
||||
}
|
||||
}
|
||||
|
||||
trait ProcessStage {
|
||||
def getName(): String;
|
||||
}
|
||||
|
||||
case class PrepareStart() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
||||
|
||||
case class PrepareComplete() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
||||
|
||||
case class CommitStart() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
||||
|
||||
case class CommitComplete() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
||||
|
||||
case class RollbackStart() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
||||
|
||||
case class RollbackComplete() extends ProcessStage {
|
||||
def getName(): String = this.getClass.getSimpleName;
|
||||
}
|
|
@ -3,9 +3,7 @@
|
|||
*/
|
||||
package cn.piflow
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
|
||||
import cn.piflow.util.Logging
|
||||
import cn.piflow.util.{IdGenerator, Logging}
|
||||
import org.apache.spark.sql._
|
||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
||||
import org.apache.spark.sql.types.StructType
|
||||
|
@ -13,28 +11,39 @@ import org.apache.spark.sql.types.StructType
|
|||
import scala.collection.JavaConversions
|
||||
import scala.collection.mutable.{ArrayBuffer, Map => MMap}
|
||||
|
||||
class SparkETLProcess extends Process with Logging {
|
||||
val ends = ArrayBuffer[(ProcessContext) => Unit]();
|
||||
val idgen = new AtomicInteger();
|
||||
class SparkProcess extends Process with Logging {
|
||||
val ends = ArrayBuffer[(ProcessExecutionContext) => Unit]();
|
||||
|
||||
def onPrepare(pec: ProcessExecutionContext) = {
|
||||
ends.foreach(_.apply(pec));
|
||||
}
|
||||
|
||||
override def onCommit(pec: ProcessExecutionContext): Unit = {
|
||||
|
||||
}
|
||||
|
||||
override def onRollback(pec: ProcessExecutionContext): Unit = {
|
||||
|
||||
}
|
||||
|
||||
override def onFail(errorStage: ProcessStage, cause: Throwable, pec: ProcessExecutionContext): Unit = {
|
||||
|
||||
override def run(pc: ProcessContext): Unit = {
|
||||
ends.foreach(_.apply(pc));
|
||||
}
|
||||
|
||||
abstract class CachedStream extends Stream {
|
||||
val id = idgen.incrementAndGet();
|
||||
val id = "" + IdGenerator.getNextId[Stream];
|
||||
val context = MMap[String, Any]();
|
||||
var cache: Option[DataFrame] = None;
|
||||
|
||||
override def getId(): Int = id;
|
||||
override def getId(): String = id;
|
||||
|
||||
def put(key: String, value: Any) = context(key) = value;
|
||||
|
||||
override def get(key: String): Any = context.get(key);
|
||||
|
||||
def produce(ctx: ProcessContext): DataFrame;
|
||||
def produce(ctx: ProcessExecutionContext): DataFrame;
|
||||
|
||||
override def feed(ctx: ProcessContext): DataFrame = {
|
||||
override def feed(ctx: ProcessExecutionContext): DataFrame = {
|
||||
if (!cache.isDefined) {
|
||||
cache = Some(produce(ctx));
|
||||
}
|
||||
|
@ -44,7 +53,7 @@ class SparkETLProcess extends Process with Logging {
|
|||
|
||||
def loadStream(streamSource: DataSource): Stream = {
|
||||
return new CachedStream() {
|
||||
override def produce(ctx: ProcessContext): DataFrame = {
|
||||
override def produce(ctx: ProcessExecutionContext): DataFrame = {
|
||||
logger.debug {
|
||||
val oid = this.getId();
|
||||
s"loading stream[_->$oid], source: $streamSource";
|
||||
|
@ -57,7 +66,7 @@ class SparkETLProcess extends Process with Logging {
|
|||
|
||||
def writeStream(streamSink: DataSink, stream: Stream): Unit = {
|
||||
ends += {
|
||||
(ctx: ProcessContext) => {
|
||||
(ctx: ProcessExecutionContext) => {
|
||||
val input = stream.feed(ctx);
|
||||
logger.debug {
|
||||
val schema = input.schema;
|
||||
|
@ -76,7 +85,7 @@ class SparkETLProcess extends Process with Logging {
|
|||
|
||||
def transform(transformer: DataTransformer, streams: Map[String, Stream]): Stream = {
|
||||
return new CachedStream() {
|
||||
override def produce(ctx: ProcessContext): DataFrame = {
|
||||
override def produce(ctx: ProcessExecutionContext): DataFrame = {
|
||||
val inputs = streams.map(x => (x._1, x._2.feed(ctx)));
|
||||
logger.debug {
|
||||
val schemas = inputs.map(_._2.schema);
|
||||
|
@ -92,33 +101,33 @@ class SparkETLProcess extends Process with Logging {
|
|||
}
|
||||
|
||||
trait Stream {
|
||||
def getId(): Int;
|
||||
def getId(): String;
|
||||
|
||||
def feed(ctx: ProcessContext): DataFrame;
|
||||
def feed(ctx: ProcessExecutionContext): DataFrame;
|
||||
|
||||
def get(key: String): Any;
|
||||
}
|
||||
|
||||
|
||||
trait DataSource {
|
||||
def load(ctx: ProcessContext): DataFrame;
|
||||
def load(ctx: ProcessExecutionContext): DataFrame;
|
||||
}
|
||||
|
||||
trait DataTransformer {
|
||||
def transform(data: Map[String, DataFrame], ctx: ProcessContext): DataFrame;
|
||||
def transform(data: Map[String, DataFrame], ctx: ProcessExecutionContext): DataFrame;
|
||||
}
|
||||
|
||||
trait DataTransformer1N1 extends DataTransformer {
|
||||
def transform(data: DataFrame, ctx: ProcessContext): DataFrame;
|
||||
def transform(data: DataFrame, ctx: ProcessExecutionContext): DataFrame;
|
||||
|
||||
def transform(dataset: Map[String, DataFrame], ctx: ProcessContext): DataFrame = {
|
||||
def transform(dataset: Map[String, DataFrame], ctx: ProcessExecutionContext): DataFrame = {
|
||||
val first = dataset.head;
|
||||
transform(first._2, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
trait DataSink {
|
||||
def save(data: DataFrame, ctx: ProcessContext): Unit;
|
||||
def save(data: DataFrame, ctx: ProcessExecutionContext): Unit;
|
||||
}
|
||||
|
||||
trait FunctionLogic {
|
||||
|
@ -126,7 +135,7 @@ trait FunctionLogic {
|
|||
}
|
||||
|
||||
case class DoMap(func: FunctionLogic, targetSchema: StructType = null) extends DataTransformer1N1 {
|
||||
def transform(data: DataFrame, ctx: ProcessContext): DataFrame = {
|
||||
def transform(data: DataFrame, ctx: ProcessExecutionContext): DataFrame = {
|
||||
val encoder = RowEncoder {
|
||||
if (targetSchema == null) {
|
||||
data.schema;
|
||||
|
@ -141,7 +150,7 @@ case class DoMap(func: FunctionLogic, targetSchema: StructType = null) extends D
|
|||
}
|
||||
|
||||
case class DoFlatMap(func: FunctionLogic, targetSchema: StructType = null) extends DataTransformer1N1 {
|
||||
def transform(data: DataFrame, ctx: ProcessContext): DataFrame = {
|
||||
def transform(data: DataFrame, ctx: ProcessExecutionContext): DataFrame = {
|
||||
val encoder = RowEncoder {
|
||||
if (targetSchema == null) {
|
||||
data.schema;
|
||||
|
@ -157,7 +166,7 @@ case class DoFlatMap(func: FunctionLogic, targetSchema: StructType = null) exten
|
|||
}
|
||||
|
||||
case class ExecuteSQL(sql: String) extends DataTransformer with Logging {
|
||||
def transform(dataset: Map[String, DataFrame], ctx: ProcessContext): DataFrame = {
|
||||
def transform(dataset: Map[String, DataFrame], ctx: ProcessExecutionContext): DataFrame = {
|
||||
|
||||
dataset.foreach { x =>
|
||||
val tableName = "table_" + x._1;
|
||||
|
@ -171,11 +180,11 @@ case class ExecuteSQL(sql: String) extends DataTransformer with Logging {
|
|||
}
|
||||
catch {
|
||||
case e: Throwable =>
|
||||
throw new SqlExecutionErrorException(e, sql);
|
||||
throw new SqlExecutionErrorException(sql, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class SqlExecutionErrorException(cause: Throwable, sql: String)
|
||||
class SqlExecutionErrorException(sql: String, cause: Throwable)
|
||||
extends RuntimeException(s"sql execution error, sql: $sql", cause) {
|
||||
}
|
|
@ -3,17 +3,20 @@ package cn.piflow
|
|||
import scala.collection.mutable.{Map => MMap}
|
||||
|
||||
trait Trigger {
|
||||
def activate(context: ExecutionContext): Unit;
|
||||
|
||||
def getTriggeredProcesses(): Seq[String];
|
||||
/**
|
||||
* start current trigger on given process
|
||||
*
|
||||
* @param processName
|
||||
* @param context
|
||||
*/
|
||||
def activate(processName: String, context: FlowExecutionContext): Unit;
|
||||
}
|
||||
|
||||
/**
|
||||
* start process while dependent processes completed
|
||||
*/
|
||||
object DependencyTrigger {
|
||||
def declareDependency(processName: String, dependentProcesses: String*): Trigger = new Trigger() {
|
||||
override def activate(executionContext: ExecutionContext): Unit = {
|
||||
class DependencyTrigger(dependentProcesses: String*) extends Trigger {
|
||||
override def activate(processName: String, executionContext: FlowExecutionContext): Unit = {
|
||||
val listener = new EventHandler {
|
||||
val completed = MMap[String, Boolean]();
|
||||
dependentProcesses.foreach { processName =>
|
||||
|
@ -34,32 +37,24 @@ object DependencyTrigger {
|
|||
executionContext.on(ProcessCompleted(dependency), listener);
|
||||
}
|
||||
}
|
||||
|
||||
override def getTriggeredProcesses(): Seq[String] = Seq(processName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* start processes repeatedly
|
||||
*/
|
||||
object TimerTrigger {
|
||||
def cron(cronExpr: String, processNames: String*): Trigger = new Trigger() {
|
||||
override def activate(executionContext: ExecutionContext): Unit = {
|
||||
class TimerTrigger(cronExpr: String, processNames: String*) extends Trigger {
|
||||
override def activate(processName: String, executionContext: FlowExecutionContext): Unit = {
|
||||
processNames.foreach { processName =>
|
||||
executionContext.scheduleProcessRepeatly(processName, cronExpr);
|
||||
}
|
||||
}
|
||||
|
||||
override def getTriggeredProcesses(): Seq[String] = processNames;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* start processes while Events happen
|
||||
*/
|
||||
object EventTrigger {
|
||||
def listen(event: Event, processNames: String*): Trigger = new Trigger() {
|
||||
override def activate(executionContext: ExecutionContext): Unit = {
|
||||
class EventTrigger(event: Event, processNames: String*) extends Trigger {
|
||||
override def activate(processName: String, executionContext: FlowExecutionContext): Unit = {
|
||||
processNames.foreach { processName =>
|
||||
executionContext.on(event, new EventHandler() {
|
||||
override def handle(event: Event, args: Any): Unit = {
|
||||
|
@ -68,7 +63,4 @@ object EventTrigger {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
override def getTriggeredProcesses(): Seq[String] = processNames;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
package cn.piflow.util
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
|
||||
import scala.collection.mutable.{Map => MMap}
|
||||
|
||||
trait IdGenerator {
|
||||
def generateId(): Int;
|
||||
}
|
||||
|
||||
object IdGenerator {
|
||||
val map = MMap[String, IdGenerator]();
|
||||
|
||||
def getNextId[T](implicit manifest: Manifest[T]) = map.getOrElseUpdate(manifest.runtimeClass.getName,
|
||||
new IdGenerator() {
|
||||
val ai = new AtomicInteger();
|
||||
|
||||
def generateId(): Int = ai.incrementAndGet();
|
||||
})
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
import cn.piflow._
|
||||
import org.junit.{Assert, Test}
|
||||
|
||||
class FlowElementManagerTest {
|
||||
@Test
|
||||
def testRAM(): Unit = {
|
||||
_testMgr(new InMemoryFlowElementManager().asInstanceOf[FlowElementManager]);
|
||||
}
|
||||
|
||||
@Test
|
||||
def testSql(): Unit = {
|
||||
_testMgr(new SqlFlowElementManager().asInstanceOf[FlowElementManager]);
|
||||
}
|
||||
|
||||
@Test
|
||||
def testDir(): Unit = {
|
||||
_testMgr(new FileSystemFlowElementManager().asInstanceOf[FlowElementManager]);
|
||||
}
|
||||
|
||||
def _testMgr(man: FlowElementManager): Unit = {
|
||||
//clear all first
|
||||
man.list().foreach(x => man.delete(x._1));
|
||||
|
||||
Assert.assertEquals(0, man.list().size);
|
||||
man.add("test", new FlowElement());
|
||||
Assert.assertEquals(1, man.list().size);
|
||||
Assert.assertEquals("test", man.list().head._1);
|
||||
Assert.assertNotNull(man.get("test"));
|
||||
Assert.assertNull(man.get("test2"));
|
||||
man.delete("test");
|
||||
Assert.assertEquals(0, man.list().size);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
import java.io.File
|
||||
|
||||
import cn.piflow.FlowElement
|
||||
import org.junit.Test
|
||||
|
||||
class FlowElementTest {
|
||||
@Test
|
||||
def test1(): Unit = {
|
||||
val f = new File("./test1.json");
|
||||
val flowJson = new FlowElement();
|
||||
FlowElement.saveFile(flowJson, f);
|
||||
val flowJson2 = FlowElement.fromFile(f);
|
||||
}
|
||||
|
||||
}
|
|
@ -2,21 +2,22 @@ import java.io.{File, FileInputStream, FileOutputStream}
|
|||
import java.util.Date
|
||||
|
||||
import cn.piflow._
|
||||
import cn.piflow.io.{Console, FileFormat, TextFile}
|
||||
import org.apache.commons.io.{FileUtils, IOUtils}
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.junit.Test
|
||||
|
||||
class FlowTest {
|
||||
private def runFlow(processes: Map[String, Process]) {
|
||||
val flow = new Flow();
|
||||
val flow: Flow = new FlowImpl();
|
||||
processes.foreach(en => flow.addProcess(en._1, en._2));
|
||||
|
||||
flow.addProcess("PrintMessage", new PrintMessage());
|
||||
|
||||
flow.addTrigger(DependencyTrigger.declareDependency("CopyTextFile", "CleanHouse"));
|
||||
flow.addTrigger(DependencyTrigger.declareDependency("CountWords", "CopyTextFile"));
|
||||
flow.addTrigger(DependencyTrigger.declareDependency("PrintCount", "CountWords"));
|
||||
flow.addTrigger(TimerTrigger.cron("0/5 * * * * ? ", "PrintMessage"));
|
||||
flow.addTrigger("CopyTextFile", new DependencyTrigger("CleanHouse"));
|
||||
flow.addTrigger("CountWords", new DependencyTrigger("CopyTextFile"));
|
||||
flow.addTrigger("PrintCount", new DependencyTrigger("CountWords"));
|
||||
flow.addTrigger("PrintMessage", new TimerTrigger("0/5 * * * * ? "));
|
||||
|
||||
val spark = SparkSession.builder.master("local[4]")
|
||||
.getOrCreate();
|
||||
|
@ -41,26 +42,73 @@ class FlowTest {
|
|||
runFlow(Map(
|
||||
"CleanHouse" -> new CleanHouse(),
|
||||
"CopyTextFile" -> new Process() {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
override def onPrepare(pec: ProcessExecutionContext): Unit =
|
||||
throw new RuntimeException("this is a bad process!");
|
||||
}
|
||||
|
||||
override def onRollback(pec: ProcessExecutionContext): Unit = ???
|
||||
|
||||
override def onFail(errorStage: ProcessStage, cause: Throwable, pec: ProcessExecutionContext): Unit = ???
|
||||
|
||||
override def onCommit(pec: ProcessExecutionContext): Unit = ???
|
||||
},
|
||||
"CountWords" -> new CountWords(),
|
||||
"PrintCount" -> new PrintCount()));
|
||||
}
|
||||
|
||||
@Test
|
||||
def test2() {
|
||||
def testSparkProcess() {
|
||||
runFlow(Map(
|
||||
"CleanHouse" -> new CleanHouse(),
|
||||
"CopyTextFile" -> new CopyTextFile(),
|
||||
"CountWords" -> SparkETLTest.createProcessCountWords(),
|
||||
"PrintCount" -> SparkETLTest.createProcessPrintCount()));
|
||||
"CountWords" -> createProcessCountWords(),
|
||||
"PrintCount" -> createProcessPrintCount()));
|
||||
}
|
||||
|
||||
val SCRIPT_1 =
|
||||
"""
|
||||
function (row) {
|
||||
return $.Row(row.get(0).replaceAll("[\\x00-\\xff]|,|。|:|.|“|”|?|!| ", ""));
|
||||
}
|
||||
""";
|
||||
val SCRIPT_2 =
|
||||
"""
|
||||
function (row) {
|
||||
var arr = $.Array();
|
||||
var str = row.get(0);
|
||||
var len = str.length;
|
||||
for (var i = 0; i < len - 1; i++) {
|
||||
arr.add($.Row(str.substring(i, i + 2)));
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
""";
|
||||
|
||||
def createProcessCountWords() = {
|
||||
val processCountWords = new SparkProcess();
|
||||
val s1 = processCountWords.loadStream(TextFile("./out/honglou.txt", FileFormat.TEXT));
|
||||
val s2 = processCountWords.transform(DoMap(ScriptEngine.logic(SCRIPT_1)), s1);
|
||||
val s3 = processCountWords.transform(DoFlatMap(ScriptEngine.logic(SCRIPT_2)), s2);
|
||||
val s4 = processCountWords.transform(ExecuteSQL(
|
||||
"select value, count(*) count from table_0 group by value order by count desc"), s3);
|
||||
|
||||
processCountWords.writeStream(TextFile("./out/wordcount", FileFormat.JSON), s4);
|
||||
processCountWords;
|
||||
}
|
||||
|
||||
def createProcessPrintCount() = {
|
||||
val processPrintCount = new SparkProcess();
|
||||
val s1 = processPrintCount.loadStream(TextFile("./out/wordcount", FileFormat.JSON));
|
||||
val s2 = processPrintCount.transform(ExecuteSQL(
|
||||
"select value from table_0 order by count desc"), s1);
|
||||
|
||||
processPrintCount.writeStream(Console(40), s2);
|
||||
processPrintCount;
|
||||
}
|
||||
}
|
||||
|
||||
class CountWords extends Process {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
class CountWords extends LazyProcess {
|
||||
override def onPrepare(pec: ProcessExecutionContext): Unit = {
|
||||
val spark = SparkSession.builder.master("local[4]")
|
||||
.getOrCreate();
|
||||
import spark.implicits._
|
||||
|
@ -69,34 +117,55 @@ class CountWords extends Process {
|
|||
.flatMap(s => s.zip(s.drop(1)).map(t => "" + t._1 + t._2))
|
||||
.groupBy("value").count.sort($"count".desc);
|
||||
|
||||
count.write.json("./out/wordcount");
|
||||
val tmpfile = File.createTempFile(this.getClass.getSimpleName, "");
|
||||
pec.put("tmpfile", tmpfile);
|
||||
|
||||
count.write.json(tmpfile.getAbsolutePath);
|
||||
spark.close();
|
||||
}
|
||||
|
||||
override def onCommit(pec: ProcessExecutionContext): Unit = {
|
||||
pec.get("tmpfile").asInstanceOf[File].renameTo(new File("./out/wordcount"));
|
||||
}
|
||||
|
||||
override def onRollback(pec: ProcessExecutionContext): Unit = {
|
||||
pec.get("tmpfile").asInstanceOf[File].delete();
|
||||
}
|
||||
}
|
||||
|
||||
class PrintMessage extends Process {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
class PrintMessage extends LazyProcess {
|
||||
def onCommit(pc: ProcessExecutionContext): Unit = {
|
||||
println("*****hello******" + new Date());
|
||||
}
|
||||
}
|
||||
|
||||
class CleanHouse extends Process {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
class CleanHouse extends LazyProcess {
|
||||
def onCommit(pc: ProcessExecutionContext): Unit = {
|
||||
FileUtils.deleteDirectory(new File("./out/wordcount"));
|
||||
FileUtils.deleteQuietly(new File("./out/honglou.txt"));
|
||||
}
|
||||
}
|
||||
|
||||
class CopyTextFile extends Process {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
class CopyTextFile extends LazyProcess {
|
||||
override def onPrepare(pec: ProcessExecutionContext): Unit = {
|
||||
val is = new FileInputStream(new File("/Users/bluejoe/testdata/honglou.txt"));
|
||||
val os = new FileOutputStream(new File("./out/honglou.txt"));
|
||||
val tmpfile = File.createTempFile(this.getClass.getSimpleName, "");
|
||||
pec.put("tmpfile", tmpfile);
|
||||
val os = new FileOutputStream(tmpfile);
|
||||
IOUtils.copy(is, os);
|
||||
}
|
||||
|
||||
override def onCommit(pec: ProcessExecutionContext): Unit = {
|
||||
pec.get("tmpfile").asInstanceOf[File].renameTo(new File("./out/honglou.txt"));
|
||||
}
|
||||
|
||||
override def onRollback(pec: ProcessExecutionContext): Unit = {
|
||||
pec.get("tmpfile").asInstanceOf[File].delete();
|
||||
}
|
||||
}
|
||||
|
||||
class PrintCount extends Process {
|
||||
def run(pc: ProcessContext): Unit = {
|
||||
class PrintCount extends LazyProcess {
|
||||
def onCommit(pc: ProcessExecutionContext): Unit = {
|
||||
val spark = SparkSession.builder.master("local[4]")
|
||||
.getOrCreate();
|
||||
import spark.implicits._
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Created by bluejoe on 2018/5/6.
|
||||
*/
|
||||
|
||||
import java.io.File
|
||||
|
||||
import cn.piflow._
|
||||
import cn.piflow.io.{Console, FileFormat, TextFile}
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.junit.Test
|
||||
|
||||
object SparkETLTest {
|
||||
val SCRIPT_1 =
|
||||
"""
|
||||
function (row) {
|
||||
return $.Row(row.get(0).replaceAll("[\\x00-\\xff]|,|。|:|.|“|”|?|!| ", ""));
|
||||
}
|
||||
""";
|
||||
val SCRIPT_2 =
|
||||
"""
|
||||
function (row) {
|
||||
var arr = $.Array();
|
||||
var str = row.get(0);
|
||||
var len = str.length;
|
||||
for (var i = 0; i < len - 1; i++) {
|
||||
arr.add($.Row(str.substring(i, i + 2)));
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
""";
|
||||
|
||||
def createProcessCountWords() = {
|
||||
val processCountWords = new SparkETLProcess();
|
||||
val s1 = processCountWords.loadStream(TextFile("./out/honglou.txt", FileFormat.TEXT));
|
||||
val s2 = processCountWords.transform(DoMap(ScriptEngine.logic(SCRIPT_1)), s1);
|
||||
val s3 = processCountWords.transform(DoFlatMap(ScriptEngine.logic(SCRIPT_2)), s2);
|
||||
val s4 = processCountWords.transform(ExecuteSQL(
|
||||
"select value, count(*) count from table_0 group by value order by count desc"), s3);
|
||||
|
||||
processCountWords.writeStream(TextFile("./out/wordcount", FileFormat.JSON), s4);
|
||||
processCountWords;
|
||||
}
|
||||
|
||||
def createProcessPrintCount() = {
|
||||
val processPrintCount = new SparkETLProcess();
|
||||
val s1 = processPrintCount.loadStream(TextFile("./out/wordcount", FileFormat.JSON));
|
||||
val s2 = processPrintCount.transform(ExecuteSQL(
|
||||
"select value from table_0 order by count desc"), s1);
|
||||
|
||||
processPrintCount.writeStream(Console(40), s2);
|
||||
processPrintCount;
|
||||
}
|
||||
}
|
||||
|
||||
class SparkETLTest {
|
||||
@Test
|
||||
def test1(): Unit = {
|
||||
FileUtils.deleteDirectory(new File("./out/wordcount"));
|
||||
|
||||
val ctx = new ProcessContext(null);
|
||||
ctx.put[SparkSession](SparkSession.builder.master("local[4]")
|
||||
.getOrCreate());
|
||||
|
||||
SparkETLTest.createProcessCountWords().run(ctx);
|
||||
SparkETLTest.createProcessPrintCount().run(ctx);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue