如何交错流(带背压)

时间:2014-12-30 14:35:58

标签: javascript rxjs bacon.js

假设我有两个可能无限的流:

s1 = a..b..c..d..e...
s2 = 1.2.3.4.5.6.7...

我希望合并流,然后使用慢速异步操作映射合并流(例如,在带有fromPromiseflatMapConcat的培根中。

我可以将它们与merge

结合使用
me = a12b3.c45d6.7e...

然后映射

s1 = a..b..c..d..e...
s2 = 1.2.3.4.5.6.7...
me = a12b3.c45d6.7e...
mm = a..1..2..b..3..c..4..5..

如您所见 greedier s2视频流从长远来看会获得优势。 这是不受欢迎的行为


merge 行为不合适,因为我想要某种背压来进行更多交错,“公平”,“循环”合并。 期望行为的几个例子:

s1 = a.....b..............c...
s2 = ..1.2.3..................
mm = a...1...b...2...3....c...

s1 = a.........b..........c...
s2 = ..1.2.3..................
mm = a...1...2...b...3....c...

一种思考方式是s1s2向工作人员发送任务,当时只能处理一项任务。使用mergeflatMapConcat我会得到一个贪婪的任务管理器,但我想要更公平的。


我想找到一个简单而优雅的解决方案。如果它对于任意数量的流很容易通用,那就太好了:

// roundRobinPromiseMap(streams: [Stream a], f: a -> Promise b): Stream b
var mm = roundRobinPromiseMap([s1, s2], slowAsyncFunc);

使用RxJS或其他Rx库的解决方案也很好。


澄清

不是zipAsArray

我不想要:

function roundRobinPromiseMap(streams, f) {
  return Bacon.zipAsArray.apply(null, streams)
    .flatMap(Bacon.fromArray)
    .flatMapConcat(function (x) {
      return Bacon.fromPromise(f(x));
    });
}

比较示例大理石图:

s1  = a.....b..............c.......
s2  = ..1.2.3......................
mm  = a...1...b...2...3....c....... // wanted
zip = a...1...b...2........c...3... // zipAsArray based

是的,我会遇到缓冲问题

...但我也会直截了当地不公平

function greedyPromiseMap(streams, f) {
  Bacon.mergeAll(streams).flatMapConcat(function (x) {
    return Bacon.fromPromise(f(x));
  });
}

大理石图

s1    = a.........b..........c...
s2    = ..1.2.3..................
mm    = a...1...2...b...3....c...
merge = a...1...2...3...b....c...

2 个答案:

答案 0 :(得分:2)

这是一段可能有用的疯狂代码。

它将输入流转换为单个“值”事件流,然后将它们与“发送”事件(以及用于簿记的“结束”事件)合并。然后,使用状态机,它从'value'事件中建立队列,并在'send'事件上调度值。

最初我写了一篇roundRobinThrottle,但我把它转移到了一个要点。

这是一个非常相似的roundRobinPromiseMap。测试中的代码,但不是。

# roundRobinPromiseMap :: (a -> Promise b) -> [EventStream] -> EventStream
roundRobinPromiseMap = (promiser, streams) ->
    # A bus to trigger new sends based on promise fulfillment
    promiseFulfilled = new Bacon.Bus()

    # Merge the input streams into a single, keyed stream
    theStream = Bacon.mergeAll(streams.map((s, idx) ->
        s.map((val) -> {
            type: 'value'
            index: idx
            value: val
        })
    ))
    # Merge in 'end' events
    .merge(Bacon.mergeAll(streams.map((s) ->
        s.mapEnd(-> {
            type: 'end'
        })
    )))
    # Merge in 'send' events that fire when the promise is fulfilled.
    .merge(promiseFulfilled.map({ type: 'send' }))
    # Feed into a state machine that keeps queues and only creates
    # output events on 'send' input events.
    .withStateMachine(
        {
            queues: streams.map(-> [])
            toPush: 0
            ended: 0
        }
        handleState

    )
    # Feed this output to the promiser
    theStream.onValue((value) ->
        Bacon.fromPromise(promiser(value)).onValue(->
            promiseFulfilled.push()
    ))

handleState = (state, baconEvent) ->
    outEvents = []

    if baconEvent.hasValue()
        # Handle a round robin event of 'value', 'send', or 'end'
        outEvents = handleRoundRobinEvent(state, baconEvent.value())
    else
        outEvents = [baconEvent]

    [state, outEvents]

handleRoundRobinEvent = (state, rrEvent) ->
    outEvents = []

    # 'value' : push onto queue
    if rrEvent.type == 'value'
        state.queues[rrEvent.index].push(rrEvent.value)
    # 'send' : send the next value by round-robin selection
    else if rrEvent.type == 'send'
        # Here's a sentinel for empty queues
        noValue = {}
        nextValue = noValue
        triedQueues = 0

        while nextValue == noValue && triedQueues < state.queues.length
            if state.queues[state.toPush].length > 0
                nextValue = state.queues[state.toPush].shift()
            state.toPush = (state.toPush + 1) % state.queues.length
            triedQueues++
        if nextValue != noValue
            outEvents.push(new Bacon.Next(nextValue))
    # 'end': Keep track of ended streams
    else if rrEvent.type == 'end'
        state.ended++

    # End the round-robin stream if all inputs have ended
    if roundRobinEnded(state)
        outEvents.push(new Bacon.End())

    outEvents

roundRobinEnded = (state) ->
    emptyQueues = allEmpty(state.queues)
    emptyQueues && state.ended == state.queues.length

allEmpty = (arrays) ->
    for a in arrays
        return false if a.length > 0
    return true

答案 1 :(得分:1)

这里的核心挑战是了解如何正式化公平。在这个问题中我已经提到了工人的比喻。事实证明,明显的公平标准是选择一个产生比其他事件少的事件的流,或采取更进一步的流:谁生成的流等待的时间更短。

之后,使用指称语义形式化所需的输出是非常简单的: code is on GitHub

我没有时间开发指示性组合子以包含来自 Bacon.js withStateMachine,因此下一步是使用 Bacon.js在JavaScript中重新实现它直接。整个可运行的解决方案是available as a gist

我的想法是用

创建一个状态机
  • 每个流成本和队列状态
  • 流和其他反馈流作为输入

当整个系统的输出被反馈时,我们可以在前一个flatMapped流结束时将下一个事件出列。

为此,我不得不制造一些丑陋的rec组合器

function rec(f) {
  var bus = new Bacon.Bus();
  var result = f(bus);
  bus.plug(result);
  return result;
}

它的类型是(EventStream a -> EventStream a) -> EventStream a - 类型类似于其他递归组合器,例如fix

它可以在更好的系统范围内行为,因为 Bus 打破了取消订阅传播。我们必须努力。

第二个辅助函数是stateMachine,它接受​​一组流并将它们转换为单个状态机。基本上它是.withStateMachine ∘ mergeAll ∘ zipWithIndex

function stateMachine(inputs, initState, f) {
  var mapped = inputs.map(function (input, i) {
    return input.map(function (x) {
      return [i, x];
    })
  });
  return Bacon.mergeAll(mapped).withStateMachine(initState, function (state, p) {
    if (p.hasValue()) {
      p = p.value();
      return f(state, p[0], p[1]);
    } else {
      return [state, p];
    }
  });
}

使用这两个帮助程序,我们可以编写一个不那么复杂的公平调度程序

function fairScheduler(streams, fn) {
  var streamsCount = streams.length;
  return rec(function (res) {
    return stateMachine(append(streams, res), initialFairState(streamsCount), function (state, i, x) {
      // console.log("FAIR: " + JSON.stringify(state), i, x);

      // END event
      if (i == streamsCount && x.end) {
        var additionalCost = new Date().getTime() - x.started;

        // add cost to input stream cost center
        var updatedState = _.extend({}, state, {
          costs: updateArray(
            state.costs,
            x.idx, function (cost) { return cost + additionalCost; }),
        });

        if (state.queues.every(function (q) { return q.length === 0; })) {
          // if queues are empty, set running: false and don't emit any events
          return [_.extend({}, updatedState, { running: false }), []];
        } else {
          // otherwise pick a stream with
          // - non-empty queue
          // - minimal cost
          var minQueueIdx = _.chain(state.queues)
            .map(function (q, i) {
              return [q, i];
            })
            .filter(function (p) {
              return p[0].length !== 0;
            })
            .sortBy(function (p) {
              return state.costs[p[1]];
            })
            .value()[0][1];

          // emit an event from that stream
          return [
            _.extend({}, updatedState, {
              queues: updateArray(state.queues, minQueueIdx, function (q) { return q.slice(1); }),
              running: true,
            }),
            [new Bacon.Next({
              value: state.queues[minQueueIdx][0],
              idx: minQueueIdx,
            })],
          ];
        }
      } else if (i < streamsCount) {
        // event from input stream
        if (state.running) {
          // if worker is running, just enquee the event
          return [
            _.extend({}, state, {
              queues: updateArray(state.queues, i, function (q) { return q .concat([x]); }),
            }),
            [],
          ];
        } else {
          // if worker isn't running, start it right away
          return [
            _.extend({}, state, {
              running: true,
            }),
            [new Bacon.Next({ value: x, idx: i})],
          ]
        }
      } else {
        return [state, []];
      }

    })
    .flatMapConcat(function (x) {
      // map passed thru events,
      // and append special "end" event
      return fn(x).concat(Bacon.once({
        end: true,
        idx: x.idx,
        started: new Date().getTime(),
      }));
    });
  })
  .filter(function (x) {
    // filter out END events
    return !x.end;
  })
  .map(".value"); // and return only value field
}

要点中的其余代码非常简单。