c++ - Boost Spirit 词法分析器状态异花授粉

我正在尝试使用词法分析器状态进行特定于上下文的解析，但似乎不同的词法分析器状态会交叉授粉。这是一个非常基本的例子

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_container.hpp>

#include <iostream>
#include <string>

using namespace boost::spirit;

template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{
    strip_comments_tokens()
      : strip_comments_tokens::base_type(lex::match_flags::match_default)
    {
        ccomment = "\\/\\*";
        endcomment = ".*\\*\\/";
        hello = "hello";

        this->self.add
            (ccomment)
            (hello);

        this->self("COMMENT").add
            (endcomment);
    }

    lex::token_def<> ccomment, endcomment;
    lex::token_def<std::string> hello;
};

template <typename Iterator>
struct strip_comments_grammar : qi::grammar<Iterator>
{
    template <typename TokenDef>
    strip_comments_grammar(TokenDef const& tok)
      : strip_comments_grammar::base_type(start)
    {
        start =  *(   tok.ccomment
                      >>  qi::in_state("COMMENT")
                      [
                          tok.endcomment
                      ]
              |   tok.hello [ std::cout << _1 ]
        );
    }

    qi::rule<Iterator> start;
};


int main(int argc, char* argv[])
{
    typedef std::string::iterator base_iterator_type;

    typedef
        lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
    lexer_type;

    typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;

    strip_comments_tokens<lexer_type> strip_comments;           // Our lexer
    strip_comments_grammar<iterator_type> g (strip_comments);   // Our parser

    std::string str("hello/*hello*/hello");
    base_iterator_type first = str.begin();

    bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);

    return 0;
}

我希望输入

"hello/*hello*/hello"

被标记为 hello ccomment endcomment hello 。但是发生的情况是输入被标记为 hello ccomment hello ，因此语法停止工作。如果您将输入更改为

"hello/*anything else*/hello"

一切都按预期工作。

有任何想法吗？

最佳答案

您永远不会修改词法分析器的状态。所以它始终处于 "INITIAL" 状态。

设置词法分析器状态应该在词法分析器阶段完成(根据我的经验和大量实验，没有可靠的方法从解析器阶段获得反馈)。

因此，您需要升级到 actor_lexer 并将语义操作附加到添加到词法分析器表中的 token_defs:

typedef
    lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
lexer_type;

和

this->self +=
     ccomment [ lex::_state = "COMMENT" ]
   | hello;

this->self("COMMENT") +=
    endcomment [ lex::_state = "INITIAL" ];

也就是说，我认为更容易完全跳过 token 。如果您真的想知道如何使用 Lexer 状态进行跳过，请参阅:
Boost.Spirit SQL grammar/lexer failure

我建议使用 lex::_pass = lex::pass_flags::pass_ignore 的 Simplify And Profit 方法:
Troubles with boost::spirit::lex & whitespace
how to get rid of escape character in a token with spirit::lex?

这是我的看法:

Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp> #include <boost/spirit/include/phoenix.hpp> #include <boost/spirit/include/qi.hpp> // for the parser expression *strip_comments.hello namespace lex = boost::spirit::lex; namespace phx = boost::phoenix; template <typename Lexer> struct strip_comments_tokens : lex::lexer<Lexer> { strip_comments_tokens() : strip_comments_tokens::base_type(lex::match_flags::match_default) { ccomment = "\\/\\*.*\\*\\/"; hello = "hello"; // why not "."? this->self += ccomment [ lex::_pass = lex::pass_flags::pass_ignore ] // IDEA: | lex::token_def<char>(".") // to just accept anything | hello ; } lex::token_def<lex::omit> ccomment; lex::token_def<std::string> hello; }; int main() { typedef std::string::const_iterator base_iterator_type; typedef lex::lexertl::actor_lexer< lex::lexertl::token<base_iterator_type/*, boost::mpl::vector<char, std::string>, boost::mpl::false_*/> > lexer_type; strip_comments_tokens<lexer_type> strip_comments; // Our lexer std::string const str("hello/*hello*/hello"); std::string stripped; base_iterator_type first = str.begin(); bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, *strip_comments.hello, stripped); if (r) std::cout << "\nStripped: '" << stripped << "'\n"; else std::cout << "Failed: '" << std::string(first, str.end()) << "'\n"; }
关于c++ - Boost Spirit 词法分析器状态异花授粉，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/27705982/