这个问题来自其前身:decoding an http header value
在我的测试断言失败时,我打印出error_message
的以下内容:
Error! Expecting <alternative><media_type_no_parameters><media_type_with_parameters> in header value: "text/html garbage ; charset = \"ISO-8859-5\"" at position: 0
这无益......
获得一个好的语法错误的正确方法是:
Error! token_pair has invalid syntax here:
text/html garbage ; charset = "ISO-8859-5"
^ must be eoi or separator of type ;
请求中的HTTP Content-Type具有以下形式:
type/subtype *( ; param[=param_value]) <eoi>
如果类型和子类型不被引用或用空格分隔,则不引用param,并且param_value既是可选的,也可以是可选的。
除type/subtype
个空格之外或水平制表符可用作空格。 type/subtype
之前可能还有空格。
现在我忽略了HTTP换行或评论的可能性,因为我知道它们已被弃用。
应该有一种类型,一种子类型和零个或多个参数。 type和subtype是HTTP标记,也就是说它们可能不包含分隔符("/\[]<>,;
等)或空格。
因此,以下标题是合法的:
text/html ; charset = "ISO-8859-5"
以下标题是非法的:
text/html garbage ; charset = "ISO-8859-5"
^^^^^^^ illegal - must be either ; or <eoi>
我用来解析这个(看似简单但实际上非常狡猾)协议组件的代码如下所示。
我的代码改编自sehe的幻想回答here (警告,先决条件是谷歌测试和提升)
//#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <gtest/gtest.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <utility>
#include <vector>
#include <string>
#include <iostream>
using token_pair = std::pair<std::string, std::string>;
struct parameter {
std::string name;
std::string value;
bool has_value;
};
struct media_type {
token_pair type_subtype;
std::vector<parameter> params;
};
BOOST_FUSION_ADAPT_STRUCT(parameter, name, value, has_value)
BOOST_FUSION_ADAPT_STRUCT(media_type, type_subtype, params)
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
using namespace std::literals;
template<class Iterator>
struct components
{
components()
{
using qi::ascii::char_;
spaces = char_(" \t");
token = +~char_( "()<>@,;:\\\"/[]?={} \t");
token_pair_rule = token >> '/' >> token;
quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"';
value = quoted_string | token;
name_only = token >> qi::attr("") >> qi::attr(false);
nvp = token >> '=' >> value >> qi::attr(true);
any_parameter = ';' >> (nvp | name_only);
some_parameters = +any_parameter;
parameters = *any_parameter;
qi::on_error<qi::fail>(
token,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
BOOST_SPIRIT_DEBUG_NODES((token)
(quoted_string)
(value)
(name_only)
(nvp)
(any_parameter)
(parameters)
)
}
protected:
using Skipper = qi::space_type;
Skipper spaces;
qi::rule<Iterator, std::string()> quoted_string, token, value;
qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter;
qi::rule<Iterator, std::vector<parameter>(), Skipper> parameters, some_parameters;
qi::rule<Iterator, token_pair()> token_pair_rule;
public:
std::string error_message;
protected:
struct ReportError {
// the result type must be explicit for Phoenix
template<typename, typename, typename, typename>
struct result { typedef void type; };
ReportError(std::string& error_message)
: error_message(error_message) {}
// contract the string to the surrounding new-line characters
template<typename Iter>
void operator()(Iter first, Iter last,
Iter error, const qi::info& what) const
{
using namespace std::string_literals;
std::ostringstream ss;
ss << "Error! Expecting "
<< what
<< " in header value: " << std::quoted(std::string(first, last))
<< " at position: " << error - first;
error_message = ss.str();
}
std::string& error_message;
};
const phoenix::function<ReportError> report_error = ReportError(error_message);
};
template<class Iterator>
struct token_grammar
: components<Iterator>
, qi::grammar<Iterator, media_type()>
{
token_grammar() : token_grammar::base_type(media_type_rule)
{
media_type_with_parameters = token_pair_rule >> qi::skip(spaces)[some_parameters];
media_type_no_parameters = token_pair_rule >> qi::attr(std::vector<parameter>()) >> qi::skip(spaces)[qi::eoi];
media_type_rule = qi::eps > (qi::hold[media_type_no_parameters]
| qi::hold[media_type_with_parameters]);
BOOST_SPIRIT_DEBUG_NODES((media_type_with_parameters)
(media_type_no_parameters)
(media_type_rule))
qi::on_error<qi::fail>(
media_type_rule,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
}
private:
using Skipper = typename token_grammar::components::Skipper;
using token_grammar::components::spaces;
using token_grammar::components::token;
using token_grammar::components::token_pair_rule;
using token_grammar::components::value;
using token_grammar::components::any_parameter;
using token_grammar::components::parameters;
using token_grammar::components::some_parameters;
public:
qi::rule<Iterator, media_type()> media_type_no_parameters, media_type_with_parameters, media_type_rule;
};
TEST(spirit_test, test1)
{
token_grammar<std::string::const_iterator> grammar{};
auto test = R"__test(application/json )__test"s;
auto ct = media_type {};
bool r = parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("application", ct.type_subtype.first);
EXPECT_EQ("json", ct.type_subtype.second);
EXPECT_EQ(0, ct.params.size());
ct = {};
test = R"__test(text/html ; charset = "ISO-8859-5")__test"s;
parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("text", ct.type_subtype.first);
EXPECT_EQ("html", ct.type_subtype.second);
ASSERT_EQ(1, ct.params.size());
EXPECT_TRUE(ct.params[0].has_value);
EXPECT_EQ("charset", ct.params[0].name);
EXPECT_EQ("ISO-8859-5", ct.params[0].value);
auto mt = media_type {};
parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_EQ("text", mt.type_subtype.first);
EXPECT_EQ("html", mt.type_subtype.second);
EXPECT_EQ(1, mt.params.size());
//
// Introduce a failure case
//
mt = media_type {};
test = R"__test(text/html garbage ; charset = "ISO-8859-5")__test"s;
r = parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_FALSE(r);
EXPECT_EQ("", grammar.error_message);
}