如何正确解析杰克逊流媒体JSON?

时间:2016-07-16 22:17:24

标签: java json jackson streaming

我正试图找出一种解决流式JSON与Jackson的简洁方法。 “流式”,如TC​​P,线下,以零散的方式,无需保证在单次读取中接收完整的JSON数据(也没有消息框架)。此外,目标是异步执行此操作,这排除了依赖于Jackson对java.io.InputStream s的处理。我提出了一个有效的解决方案(见下面的演示),但我并不是特别满意。除了势在必行的风格,我不喜欢JsonParser#readValueAsTree对不完整JSON的不当处理。处理字节流时,不完整的数据是绝对正常的并且不是特殊情况,因此在Jackson的API中查看java.io.IOException是奇怪的(并且是不可接受的)。我也研究过使用杰克逊的TokenBuffer,但遇到了类似的问题。杰克逊真的不是真的想要处理真正的流媒体JSON吗?

package com.example.jackson;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Collections.emptyList;

public class AsyncJsonParsing {
    public static void main(String[] args) {
        final AsyncJsonParsing parsing = new AsyncJsonParsing();

        parsing.runFirstScenario();
        parsing.runSecondScenario();
        parsing.runThirdScenario();
        parsing.runFourthScenario();
    }



    static final class ParsingOutcome {
        final List<JsonNode> roots;//list of parsed JSON objects and JSON arrays
        final byte[] remainder;

        ParsingOutcome(final List<JsonNode> roots, final byte[] remainder) {
            this.roots = roots;
            this.remainder = remainder;
        }
    }

    final byte[] firstMessage = "{\"message\":\"first\"}".getBytes(UTF_8);
    final byte[] secondMessage = "{\"message\":\"second\"}".getBytes(UTF_8);

    final byte[] leadingHalfOfFirstMessage = Arrays.copyOfRange(firstMessage, 0, firstMessage.length / 2);
    final byte[] trailingHalfOfFirstMessage = Arrays.copyOfRange(firstMessage, firstMessage.length / 2, firstMessage.length);

    final byte[] leadingHalfOfSecondMessage = Arrays.copyOfRange(secondMessage, 0, secondMessage.length / 2);
    final byte[] trailingHalfOfSecondMessage = Arrays.copyOfRange(secondMessage, secondMessage.length / 2, secondMessage.length);

    final ObjectMapper mapper = new ObjectMapper();

    void runFirstScenario() {
        //expectation: remainder = empty array and roots has a single element - parsed firstMessage
        final ParsingOutcome result = parse(firstMessage, mapper);
        report(result);
    }

    void runSecondScenario() {
        //expectation: remainder = leadingHalfOfFirstMessage and roots is empty
        final ParsingOutcome firstResult = parse(leadingHalfOfFirstMessage, mapper);
        report(firstResult);

        //expectation: remainder = empty array and roots has a single element - parsed firstMessage
        final ParsingOutcome secondResult = parse(concat(firstResult.remainder, trailingHalfOfFirstMessage), mapper);
        report(secondResult);
    }

    void runThirdScenario() {
        //expectation: remainder = leadingHalfOfSecondMessage and roots has a single element - parsed firstMessage
        final ParsingOutcome firstResult = parse(concat(firstMessage, leadingHalfOfSecondMessage), mapper);
        report(firstResult);

        //expectation: remainder = empty array and roots has a single element - parsed secondMessage
        final ParsingOutcome secondResult = parse(concat(firstResult.remainder, trailingHalfOfSecondMessage), mapper);
        report(secondResult);
    }

    void runFourthScenario() {
        //expectation: remainder = empty array and roots has two elements - parsed firstMessage, followed by parsed secondMessage
        final ParsingOutcome result = parse(concat(firstMessage, secondMessage), mapper);
        report(result);
    }

    static void report(final ParsingOutcome result) {
        System.out.printf("Remainder of length %d: %s%n", result.remainder.length, Arrays.toString(result.remainder));
        System.out.printf("Total of %d parsed JSON roots: %s%n", result.roots.size(), result.roots);
    }

    static byte[] concat(final byte[] left, final byte[] right) {
        final byte[] union = Arrays.copyOf(left, left.length + right.length);
        System.arraycopy(right, 0, union, left.length, right.length);
        return union;
    }

    static ParsingOutcome parse(final byte[] chunk, final ObjectMapper mapper) {
        final List<JsonNode> roots = new LinkedList<>();

        JsonParser parser;
        JsonNode root;
        try {
            parser = mapper.getFactory().createParser(chunk);
            root = parser.readValueAsTree();
        } catch (IOException e) {
            return new ParsingOutcome(emptyList(), chunk);
        }

        byte[] remainder = new byte[0];
        try {
            while(root != null) {
                roots.add(root);
                remainder = extractRemainder(parser);
                root = parser.readValueAsTree();
            }
        } catch (IOException e) {
            //fallthrough
        }

        return new ParsingOutcome(roots, remainder);
    }

    static byte[] extractRemainder(final JsonParser parser) {
        try {
            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            parser.releaseBuffered(baos);
            return baos.toByteArray();
        } catch (IOException e) {
            return new byte[0];
        }
    }
}

进一步详细说明(至少在我看来),解析任何流数据归结为一个简单的函数,它接受一个字节数组并返回(1)一个可能为空的解析结果列表的元组(2)剩余的,当前不可解的字节数组。在上面的代码段中,此元组由ParsingOutcome的实例表示。

0 个答案:

没有答案