我有一个应用程序,它接收TCP套接字连接,它将以以下形式发送数据:
n{json}bbbbbbbbbb...
其中n
是以下json
的字节长度,json
可能类似{'splitEvery': 5}
,这将决定我如何分解和处理可能无限的字节串。
我想在Scala中使用Akka处理此流。我认为 streams
是正确的工具,但我很难找到一个使用具有不同处理阶段的流的示例。大多数流流似乎一遍又一遍地做同样的事情,比如prefixAndTail
示例here。这与我想要处理流的n{json}
部分的方式非常接近,但区别在于我只需要为每个连接执行一次,然后转到另一个“阶段” “处理。
有人能指出我使用具有不同阶段的Akka流的示例吗?
答案 0 :(得分:1)
由于块大小取决于流的内容,但是在处理流数据之前必须实现所有处理阶段,因此您无法轻松使用方便的方法,如Source.group(chunkSize)
。我建议从流的开头剥离元数据(使用与Akka流不同的方法),并将流的其余部分提供给Source.group(chunkSize)
。
或者你可以使用状态机折叠/扫描流,但这更麻烦:
implicit val system = ActorSystem("Test")
implicit val materializer = ActorMaterializer()
val input = """17{"splitEvery": 5}aaaaabbbbbccccc"""
def getChunkSize(json: String) = 5 // dummy implementation
sealed trait State
case class GetLength(number: String) extends State
case class GetJson(n: Int, json: String) extends State
case class ProcessData(chunkSize: Int, s: String) extends State
type Out = (State, Option[String])
val future = Source.fromIterator(() => input.iterator).
scan[Out]((GetLength(""), None)) {
case ((GetLength(s), _), e) if e.isDigit => (GetLength(s + e), None)
case ((GetLength(s), _), e) => (GetJson(s.toInt - 1, e.toString), None)
case ((GetJson(0, json), _), e) => (ProcessData(getChunkSize(json), e.toString), None)
case ((GetJson(n, json), _), e) => (GetJson(n - 1, json + e), None)
case ((ProcessData(chunkSize, s), _), e) if s.length == chunkSize - 1 => (ProcessData(chunkSize, ""), Some(s + e))
case ((ProcessData(chunkSize, s), _), e) => (ProcessData(chunkSize, s + e), None)
}.
collect { case (_, Some(s)) => s }.
runForeach(println)
println(Await.result(future, 1 second))
// aaaaa
// bbbbb
// ccccc
为了记录,这是一种自takeWhile
消耗迭代器的下一个元素(_.isDigit
失败时)以来一直无法工作的方法,这仍然需要后续的JSON解析阶段:
val it = input.iterator
def nextSource = Source.fromIterator(() => it)
implicit class Stringify[+Out, +Mat](val source: Source[Out, Mat]) {
def stringify = source.runFold("")(_ + _)
}
val future2 = nextSource.
takeWhile(_.isDigit).
stringify.
map(_.toInt).
map { l =>
nextSource.
take(l).
stringify.
map(getChunkSize).
map { chunkSize =>
nextSource.
grouped(chunkSize).
map(_.mkString).
runForeach(println)
}
}
println(Await.result(future2, 1 second))
// aaaab
// bbbbc
// cccc
答案 1 :(得分:0)
我需要处理一个 2 字节长度的标头,然后是数据。这可能有助于使用 GraphStage 根据长度前缀处理/累积数据的逻辑。在线使用各种akka docs和之前提供的解决方案中提出的想法在java中实现它。
Java 代码
package com.example;
import akka.Done;
import akka.NotUsed;
import akka.actor.typed.ActorSystem;
import akka.actor.typed.scaladsl.Behaviors;
import akka.stream.*;
import akka.stream.javadsl.Flow;
import akka.stream.javadsl.Sink;
import akka.stream.javadsl.Source;
import akka.stream.stage.GraphStage;
import akka.stream.stage.GraphStageLogic;
import akka.stream.stage.InHandler;
import akka.stream.stage.OutHandler;
import akka.util.ByteString;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletionStage;
//import akka.stream.scaladsl.Sink;
public class TwoByteLengthFramingFlow extends GraphStage<FlowShape<ByteString, ByteString>> {
private final Inlet<ByteString> inlet = Inlet.create("TwoByteLengthFraming.in");
private final Outlet<ByteString> outlet = Outlet.create("TwoByteLengthFraming.out");
FlowShape<ByteString, ByteString> shape = FlowShape.of(inlet, outlet);
public static void main(String[] args) {
ActorSystem system = ActorSystem.create(Behaviors.empty(), "Blah");
byte[] b0 = {0, 4, 'A', 'B', 'C', 'D', 0}; // The last 0 to simulate incomplete length in packet 1
byte[] b1 = {4, 'E', 'F', 'G', 'H', 'A',9};// the last 9 is to simulate stream end but graphstage has one byte in the buffer (simulating incomplete message)
ByteString x0 = ByteString.fromArray(b0);
ByteString x1 = ByteString.fromArray(b1);
List<ByteString> l = new ArrayList<>();
// simulate messages as tcp streams where data is coming in and we need assemble the message from the packets.
l.add(x0);
l.add(x1);
// ByteString[] b =
// {ByteString.fromArray({0x0}])'A','B','C','D',0,4,'E','F','G'};
Graph<FlowShape<ByteString, ByteString>, NotUsed> flowgraph = new TwoByteLengthFramingFlow();
Flow<ByteString, ByteString, NotUsed> flow = Flow.fromGraph(flowgraph);
Sink<ByteString, CompletionStage<Done>> printSink = Sink.foreach(msg -> System.out.println(msg.utf8String()));
Source.from(l)
.via(flow)
.to(printSink)
.run(system);
// Just to see what happens when the sink cancelled terminates the flow
// Source.from(l)
// .via(flow)
// .to(Sink.cancelled())
// .run(system);
}
Flow<ByteString, ByteString, NotUsed> getFlow() {
Graph<FlowShape<ByteString, ByteString>, NotUsed> flowgraph = new TwoByteLengthFramingFlow();
Flow<ByteString, ByteString, NotUsed> flow = Flow.fromGraph(flowgraph);
return flow;
}
@Override
public FlowShape<ByteString, ByteString> shape() {
return shape;
}
@Override
public GraphStageLogic createLogic(Attributes inheritedAttributes) throws Exception {
// TODO Auto-generated method stub
return new GraphStageLogicExtension(shape);
}
private final class GraphStageLogicExtension extends GraphStageLogic {
private final List<ByteString> messages = new ArrayList<>();
protected ByteString buffer = ByteString.emptyByteString();
private GraphStageLogicExtension(Shape shape) {
super(shape);
setHandler(inlet, new InHandler() {
@Override
public void onPush() throws Exception {
System.out.println("onPush()");
// upstream pushed data, our onPush got called
// All incoming bytes are added to the buffer. We concat as there may incomplete messages in the bugger
buffer = buffer.concat(grab(inlet));
// recursively extract as many messages as you can [len+message+len+message....]
// Extract the messages you can and data can remain in the buffer. If you cannot extract len+data as all the bytes aren't available (either 2 bytes of length aren't available
// or 2 bytes of length are available but the equivalent data for that length is not available
// , append that into the buffer and extract what you can. The net time data arrives it will get added to the buffer and we can try again.
extractMessages();
// emit extracted messages
emitChunk();
//
//pull(inlet);
}
@Override
public void onUpstreamFinish() throws Exception {
System.out.println("onUpstreamFinish()");
// upstream signalled its done
if (buffer.size() == 0 && messages.size() == 0) {
// no incomplete message in buffer
completeStage();
}
else {
// There are elements left in buffer, so
// we keep accepting downstream pulls and push from buffer until emptied.
//
// It might be though, that the upstream finished while it was pulled, in which
// case we will not get an onPull from the downstream, because we already had one.
// In that case we need to emit from the buffer.
if (isAvailable(outlet))
emitChunk();
}
}
private void emitChunk() {
System.out.println("emitChunk()");
// If we don't have extracted messaged
if (messages.size() <= 0) {
// if the upstream closed the inlet, we are done
if (isClosed(inlet)) {
completeStage();
}
// we can pull to get more data
else {
System.out.println("pull()");
pull(inlet);
}
}
else {
// we have messages so send one and remove it from the list.
System.out.println("emit()");
emit(outlet, messages.remove(0));
}
}
});
setHandler(outlet, new OutHandler() {
@Override
public void onPull() throws Exception {
System.out.println("onPull()");
//downstream pulled so we got onPull
if (messages.size() > 0) {
// if we have messages we can push them
System.out.println("push()");
push(outlet, messages.remove(0));
}
else
// if we don't have messages to push, we need more dta from upstream, we will do a pull and upstream can react to it and push data and our onPush will get called
System.out.println("pull()");
pull(inlet);
}
@Override
public void onDownstreamFinish() throws Exception {
System.out.println("Downstream Finished");
OutHandler.super.onDownstreamFinish();
}
});
}
protected void extractMessages() {
Tuple2<ByteString, ByteString> lengthDataTuple = buffer.splitAt(2);
int messageLength = getLength(lengthDataTuple._1);
if ((messageLength < 0) || (lengthDataTuple._2.take(messageLength)
.size() != messageLength)) {
return;
}
if (messageLength == 0) {
//maybe its a 0 byte ping message. Let the next stage handle empty bytestring.
messages.add(ByteString.emptyByteString());
}
else {
messages.add(lengthDataTuple._2.take(messageLength));
}
//Update buffer by removing messages that could be extracted
buffer = buffer.drop(2 + messageLength);
// recurse, till we can extract whatever is possible
extractMessages();
}
private int getLength(ByteString header) {
/*
If length was
255 = 0xFF : b[0] = 0xff b[1] = 0x00,
256 = 0x100 : b[0] = 0x00 b[1] = 0x01,
257= 0x101 : b[0] = 0x01 b[1] = 0x01
Basically length div 256 is in b[1] and length mod 256 is in b[0]
*/
byte[] b = header.toArray();
if (b.length == 2) {
return (((b[0]) & 0xFF) << 8) | ((b[1]) & 0xFF);
}
return -1;
}
}
}