我无法使用Boost :: Spirit编写我认为应该是简单解析器的内容。 (我使用Spirit而不仅仅是使用字符串函数,因为这对我来说是一个学习练习)。
要解析的数据采用键值对的形式,其中值本身可以是键值对。键是字母数字(带下划线,没有数字作为第一个字符);值是字母数字加.-_
- 值可以是DD-MMM-YYYY
格式的日期,例如除了普通的旧字母数字字符串之外,01-Jan-2015
和浮点数(如3.1415
)。键和值用=
分隔;对用;
分隔;结构化值使用{
... }
分隔。目前我正在从用户输入中删除所有空格,然后将其传递给Spirit。
示例输入:
Key1 = Value1; Key2 = { NestedKey1=Alan; NestedKey2 = 43.1232; }; Key3 = 15-Jul-1974 ;
然后我会删除所有空格以提供
Key1=Value1;Key2={NestedKey1=Alan;NestedKey2=43.1232;};Key3=15-Jul-1974;
然后我实际将它传递给Spirit。
当价值只是价值时,我现在所做的只是花花公子。当我开始在输入中编码结构化值时,Spirit会在第一个结构化值之后停止。如果只有一个结构化值,则解决方法是将其放在输入的末尾...但有时我需要两个或更多结构化值。
以下编译VS2013并说明错误:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/pair.hpp>
#include <boost/fusion/adapted.hpp>
#include <map>
#include <string>
#include <iostream>
typedef std::map<std::string, std::string> ARGTYPE;
#define BOOST_SPIRIT_DEBUG
namespace qi = boost::spirit::qi;
namespace fusion = boost::fusion;
template < typename It, typename Skipper>
struct NestedGrammar : qi::grammar < It, ARGTYPE(), Skipper >
{
NestedGrammar() : NestedGrammar::base_type(Sequence)
{
using namespace qi;
KeyName = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
Value = +qi::char_("-.a-zA-Z_0-9");
Pair = KeyName >> -(
'=' >> ('{' >> raw[Sequence] >> '}' | Value)
);
Sequence = Pair >> *((qi::lit(';') | '&') >> Pair);
BOOST_SPIRIT_DEBUG_NODE(KeyName);
BOOST_SPIRIT_DEBUG_NODE(Value);
BOOST_SPIRIT_DEBUG_NODE(Pair);
BOOST_SPIRIT_DEBUG_NODE(Sequence);
}
private:
qi::rule<It, ARGTYPE(), Skipper> Sequence;
qi::rule<It, std::string()> KeyName;
qi::rule<It, std::string(), Skipper> Value;
qi::rule<It, std::pair < std::string, std::string>(), Skipper> Pair;
};
template <typename Iterator>
ARGTYPE Parse2(Iterator begin, Iterator end)
{
NestedGrammar<Iterator, qi::space_type> p;
ARGTYPE data;
qi::phrase_parse(begin, end, p, qi::space, data);
return data;
}
// ARGTYPE is std::map<std::string,std::string>
void NestedParse(std::string Input, ARGTYPE& Output)
{
Input.erase(std::remove_if(Input.begin(), Input.end(), isspace), Input.end());
Output = Parse2(Input.begin(), Input.end());
}
int main(int argc, char** argv)
{
std::string Example1, Example2, Example3;
ARGTYPE Out;
Example1 = "Key1=Value1 ; Key2 = 01-Jan-2015; Key3 = 2.7181; Key4 = Johnny";
Example2 = "Key1 = Value1; Key2 = {InnerK1 = one; IK2 = 11-Nov-2011;};";
Example3 = "K1 = V1; K2 = {IK1=IV1; IK2=IV2;}; K3=V3; K4 = {JK1=JV1; JK2=JV2;};";
NestedParse(Example1, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
std::cout << "=====" << std::endl;
/* get the following, as expected:
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
*/
NestedParse(Example2, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
std::cout << "=====" << std::endl;
/* get the following, as expected:
Key1|Value1
key2|InnerK1=one;IK2=11-Nov-2011
*/
NestedParse(Example3, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
/* Only get the first two lines of the expected output:
K1|V1
K2|IK1=IV1;IK2=IV2
K3|V3
K4|JK1=JV1;JK2=JV2
*/
return 0;
}
我不确定问题是由于我对BNF的无知,我对精神的无知,还是在此时我对两者的无知。
任何帮助表示赞赏。我已阅读过,例如Spirit Qi sequence parsing issues和其中的链接,但我仍然无法弄清楚我做错了什么。
答案 0 :(得分:2)
确实,这恰恰是Spirit擅长的简单语法。
此外,绝对没有必要预先跳过空白:为了这个目的,Spirit已经内置了船长。
但是对于你明确的问题:
Sequence
规则过于复杂。您可以使用列表运算符(%
):
Sequence = Pair % char_(";&");
现在您的问题是您使用;
结束序列,这是不期望的,因此Sequence
和Value
最终都无法解析。除非你#define BOOST_SPIRIT_DEBUG
¹并检查调试输出,否则这不是很清楚。
所以要修复它:
Sequence = Pair % char_(";&") >> -omit[char_(";&")];
Fix Live On Coliru (或with debug info)
打印:
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
=====
Key1|Value1
Key2|InnerK1=one;IK2=11-Nov-2011;
=====
K1|V1
K2|IK1=IV1;IK2=IV2;
K3|V3
K4|JK1=JV1;JK2=JV2;
实际上,这很简单。只需删除删除空格的冗余行。船长已经qi::space
。
(注意虽然该skipper不适用于您的Value
规则,因此值不能包含空格,但解析器也不会默默地跳过它;我想这可能是您想要的。请注意它)。
你实际上想要一个递归的AST,而不是解析成平面地图。
提升recursive variants让这变得轻而易举:
namespace ast {
typedef boost::make_recursive_variant<std::string, std::map<std::string, boost::recursive_variant_> >::type Value;
typedef std::map<std::string, Value> Sequence;
}
要完成这项工作,您只需更改规则的声明属性类型:
qi::rule<It, ast::Sequence(), Skipper> Sequence;
qi::rule<It, std::pair<std::string, ast::Value>(), Skipper> Pair;
qi::rule<It, std::string(), Skipper> String;
qi::rule<It, std::string()> KeyName;
规则本身甚至不必更改 。你需要写一个小访问者来传输AST:
static inline std::ostream& operator<<(std::ostream& os, ast::Value const& value) {
struct vis : boost::static_visitor<> {
vis(std::ostream& os, std::string indent = "") : _os(os), _indent(indent) {}
void operator()(std::map<std::string, ast::Value> const& map) const {
_os << "map {\n";
for (auto& entry : map) {
_os << _indent << " " << entry.first << '|';
boost::apply_visitor(vis(_os, _indent+" "), entry.second);
_os << "\n";
}
_os << _indent << "}\n";
}
void operator()(std::string const& s) const {
_os << s;
}
private:
std::ostream& _os;
std::string _indent;
};
boost::apply_visitor(vis(os), value);
return os;
}
现在打印:
map {
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
}
=====
map {
Key1|Value1
Key2|InnerK1 = one; IK2 = 11-Nov-2011;
}
=====
map {
K1|V1
K2|IK1=IV1; IK2=IV2;
K3|V3
K4|JK1=JV1; JK2=JV2;
}
当然,关键是你现在将raw[Sequence]
更改为 Sequence
:
map {
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
}
=====
map {
Key1|Value1
Key2|map {
IK2|11-Nov-2011
InnerK1|one
}
}
=====
map {
K1|V1
K2|map {
IK1|IV1
IK2|IV2
}
K3|V3
K4|map {
JK1|JV1
JK2|JV2
}
}
<强> Live On Coliru 强>
//#define BOOST_SPIRIT_DEBUG
#include <boost/variant.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <string>
#include <map>
namespace ast {
typedef boost::make_recursive_variant<std::string, std::map<std::string, boost::recursive_variant_> >::type Value;
typedef std::map<std::string, Value> Sequence;
}
namespace qi = boost::spirit::qi;
template <typename It, typename Skipper>
struct NestedGrammar : qi::grammar <It, ast::Sequence(), Skipper>
{
NestedGrammar() : NestedGrammar::base_type(Sequence)
{
using namespace qi;
KeyName = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
String = +qi::char_("-.a-zA-Z_0-9");
Pair = KeyName >> -(
'=' >> ('{' >> Sequence >> '}' | String)
);
Sequence = Pair % char_(";&") >> -omit[char_(";&")];
BOOST_SPIRIT_DEBUG_NODES((KeyName) (String) (Pair) (Sequence))
}
private:
qi::rule<It, ast::Sequence(), Skipper> Sequence;
qi::rule<It, std::pair<std::string, ast::Value>(), Skipper> Pair;
qi::rule<It, std::string(), Skipper> String;
qi::rule<It, std::string()> KeyName;
};
template <typename Iterator>
ast::Sequence DoParse(Iterator begin, Iterator end)
{
NestedGrammar<Iterator, qi::space_type> p;
ast::Sequence data;
qi::phrase_parse(begin, end, p, qi::space, data);
return data;
}
static inline std::ostream& operator<<(std::ostream& os, ast::Value const& value) {
struct vis : boost::static_visitor<> {
vis(std::ostream& os, std::string indent = "") : _os(os), _indent(indent) {}
void operator()(std::map<std::string, ast::Value> const& map) const {
_os << "map {\n";
for (auto& entry : map) {
_os << _indent << " " << entry.first << '|';
boost::apply_visitor(vis(_os, _indent+" "), entry.second);
_os << "\n";
}
_os << _indent << "}\n";
}
void operator()(std::string const& s) const {
_os << s;
}
private:
std::ostream& _os;
std::string _indent;
};
boost::apply_visitor(vis(os), value);
return os;
}
int main()
{
std::string const Example1 = "Key1=Value1 ; Key2 = 01-Jan-2015; Key3 = 2.7181; Key4 = Johnny";
std::string const Example2 = "Key1 = Value1; Key2 = {InnerK1 = one; IK2 = 11-Nov-2011;};";
std::string const Example3 = "K1 = V1; K2 = {IK1=IV1; IK2=IV2;}; K3=V3; K4 = {JK1=JV1; JK2=JV2;};";
std::cout << DoParse(Example1.begin(), Example1.end()) << "\n";
std::cout << DoParse(Example2.begin(), Example2.end()) << "\n";
std::cout << DoParse(Example3.begin(), Example3.end()) << "\n";
}
¹你“拥有”它,但不是在正确的地方!它应该在任何Boost包括之前进行。