用提升精神充分解码http标头值

本文介绍了用提升精神充分解码http标头值的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！问题描述再次，我发现自己达到了提振精神。再次，我发现自己被它击败了。 HTTP标头值采用一般形式： text / html; q = 1.0，text / *; q = 0.8，image / gif; q = 0.6，image / jpeg; q = 0.6，image / *; q = 0.5，* / * q = 0.1 value * OWS [; * OWS名称* OWS [= * OWS possible_quoted_value] * OWS [...]] * OWS [，<另一个值> ...] 因此在我心目中，此标题解码为： value [0]： text / html params： name：q value：1.0 value [1]： text / * params： name：q value：0.8 ... 等等。我相信任何人谁知道如何，这里的boost :: spirit :: qi语法是微不足道的。我恳求你的帮助。例如，下面是解码 Content-Type 头的代码大纲，它限制为 type / subtype ，任何数量的参数形式< sp> ; < sp> token = token | quoted_string template< class Iter> void parse（ContentType& ct，Iter first，Iter last） { ct.mutable_type（） - > append（to_lower（consume_token（first，last））） consume_lit（first，last，'/'）; ct.mutable_subtype（） - > append（to_lower（consume_token（first，last）））; while（first！= last）{ skipwhite（first，last）; if（consume_char_if（first，last，';'）） { auto p = ct.add_parameters（）; skipwhite（first，last）; p-> set_name（to_lower（consume_token（first，last））））; skipwhite（first，last）; if（consume_char_if（first，last，'='）） { skipwhite（first，last）; p-> set_value（consume_token_or_quoted（first，last））; } else { //这个参数没有值} } else if（consume_char_if（first，last，'，'） { //通常我们应该得到下一个值标记这里，但在Content-Type的情况下 //我们必须barf throw std :: runtime_error（invalid使用;在Content-Type）; } } } ContentType& populate（ContentType& ct，const std :: string& header_value） { parse（ct，header_value.begin（），header_value.end（））; return ct; } 解决方案作为OP 发布的并进行审核。没有必要指定 void（）。事实上，在这种情况下，最好使用 qi :: unused_type ，这是没有声明属性类型的默认规则。如果您不希望公开属性，则不需要 char _ 。不需要将每个字符解析器包含在 rule 。这伤害性能。最好保持原表达式树不被评估，因为Qi可以更好地优化解析器表达式，编译器可以内联更多。此外，Qi没有属性上的移动语义，因此，避免冗余规则消除了在包含规则中连接的子属性的冗余副本。示例替代拼写（谨慎，请参阅将解析器分配到自动变量） auto CR = qi :: lit（'\r' ）; auto LF = qi :: lit（'\\\'）; auto CRLF = qi :: lit（\r\\\）; auto HT = qi :: lit（'\t'）; auto SP = qi :: lit（''）; auto LWS = qi :: copy（-CRLF> +（SP | HT））; // deepcopy UPALPHA = char _（'A'，'Z'）; LOALPHA = char _（'a'，'z'）; ALPHA = UPALPHA | LOALPHA; DIGIT = char _（'0'，'9'）; // CTL = char_（0，31）| char_（127）; TEXT = char _（\t\x20-\x7e\x80-\xff）; 由于您不必使用 char _ ，您也没有使用 qi :: omit [] 杀掉属性。当您在一个Qi域表达式模板中时，原始字符串/ char字面值被隐式包装在 qi :: lit 你可以简单的像 quoted_pair = omit [char _（'\\'）]> char_; quoted_string = omit [char_（'''']]>> *（qdtext | quoted_pair）>>省略[char_（''）]; 只是 quoted_pair ='\\'>> char_; quoted_string =''>> *（qdtext | quoted_pair）>>'''; 而不是使用所有的时间，只需声明与船长的规则。现在，您可以简化 nvp = token>>省略[* SP]>省略['=']>>省略[* SP]>>值; any_parameter = omit [* SP]>>省略[char_（';'）]>>省略[* SP]>> （nvp | name_only）; content_type_rule = type_subtype_rule>> * any_parameter; 只是 nvp = token>> '='>>值; any_parameter =';'>> （nvp | name_only）; content_type_rule = type_subtype_rule>> qi :: skip（spaces）[* any_parameter]; 请注意，没有船长声明的规则的任何子规则调用 implicitly lexeme ：提升精神船长问题最近的编译器+提升版本使BOOST_FUSION_ADAPT_STRUCT更简单，使用 decltype 简化的结果很多少噪音： //＃define BOOST_SPIRIT_DEBUG #include< boost / spirit / include / qi.hpp> #include< boost / fusion / include / adapted.hpp> 结构参数{ boost :: optional< std :: string>名称; std :: string value; }; struct type_subtype { std :: string type; std :: string subtype; }; struct content_type { type_subtype type; std :: vector< parameter>参数}; BOOST_FUSION_ADAPT_STRUCT（type_subtype，type，subtype） BOOST_FUSION_ADAPT_STRUCT（content_type，type，params） template< class Iterator> struct token_grammar：qi :: grammar< Iterator，content_type（）> { token_grammar（）：token_grammar :: base_type（content_type_rule） {使用qi :: ascii :: char_; spaces = char_（''）; token = +〜char_（（）<> @，;：\\\/ []？= {} \t）; quoted_string =' >> *（'\\'>> char_ |〜char_（''））>>'''; value = quoted_string |令牌 type_subtype_rule = token>> '/'>>令牌 name_only = token; nvp = token>> '='>>值; any_parameter =';'>> （nvp | name_only）; content_type_rule = type_subtype_rule>> qi :: skip（spaces）[* any_parameter]; BOOST_SPIRIT_DEBUG_NODES（（nvp）（any_parameter）（content_type_rule）（quoted_string）（token）（value）（type_subtype_rule））} private：使用Skipper = qi :: space_type; 船长空间; qi :: rule< Iterator，binary_parameter（），Skipper> nvp; qi :: rule< Iterator，parameter（），Skipper> any_parameter; qi :: rule< Iterator，content_type（）> content_type_rule; // lexemes qi :: rule< Iterator，std :: string（）> quoted_string，token，value; qi :: rule< Iterator，type_subtype（）> type_subtype_rule; qi :: rule< Iterator，unary_parameter（）> name_only; }; 查看 Live on Coliru （具有相同的测试用例） BONUS 我想在这种情况下使用更简单的AST。通过使用 qi :: attr 注入一些属性值，您可以避免使用boost :: variant 和/或甚至避免boost :: optional a>：结构参数{ bool have_name; std :: string name; std :: string value; }; struct type_subtype { std :: string type; std :: string subtype; }; struct content_type { type_subtype type; std :: vector< parameter>参数}; BOOST_FUSION_ADAPT_STRUCT（参数，have_name，名称，值） BOOST_FUSION_ADAPT_STRUCT（type_subtype，type，subtype） BOOST_FUSION_ADAPT_STRUCT（content_type，type，params） namespace qi = boost :: spirit :: qi; template< class Iterator> struct token_grammar：qi :: grammar< Iterator，content_type（）> { token_grammar（）：token_grammar :: base_type（content_type_rule） {使用qi :: ascii :: char_; spaces = char_（''）; token = +〜char_（（）<> @，;：\\\/ []？= {} \t）; quoted_string =' >> *（'\\'>> char_ |〜char_（''））>>'''; value = quoted_string |令牌 type_subtype_rule = token>> '/'>>令牌 name_only = qi :: attr（false）>> qi :: attr（）>>令牌 nvp = qi :: attr（true）>> token>> '='>>值; any_parameter =';'>> （nvp | name_only）; content_type_rule = type_subtype_rule>> qi :: skip（spaces）[* any_parameter]; BOOST_SPIRIT_DEBUG_NODES（（nvp）（any_parameter）（content_type_rule）（quoted_string）（token）（value）（type_subtype_rule））} private：使用Skipper = qi :: space_type; 船长空间; qi :: rule< Iterator，parameter（），Skipper> nvp，name_only，any_parameter; qi :: rule< Iterator，content_type（）> content_type_rule; // lexemes qi :: rule< Iterator，std :: string（）> quoted_string，token，value; qi :: rule< Iterator，type_subtype（）> type_subtype_rule; }; Once again, I find myself reaching for boost spirit. Once again I find myself defeated by it.A HTTP header value takes the general form:text/html; q=1.0, text/*; q=0.8, image/gif; q=0.6, image/jpeg; q=0.6, image/*; q=0.5, */*; q=0.1i.e. value *OWS [; *OWS name *OWS [= *OWS possibly_quoted_value] *OWS [...]] *OWS [ , <another value> ...]so in my mind, this header decodes to:value[0]: text/html params: name : q value : 1.0value[1]: text/* params: name : q value : 0.8...and so on.I am certain that to anyone who knows how, the boost::spirit::qi syntax for this is trivial.I humbly ask for your assistance.for example, here's the outline of the code which decodes the Content-Type header, which is limited to one value of the form type/subtype, with any number of parameters of the form <sp> ; <sp> token=token|quoted_stringtemplate<class Iter>void parse(ContentType& ct, Iter first, Iter last){ ct.mutable_type()->append(to_lower(consume_token(first, last))); consume_lit(first, last, '/'); ct.mutable_subtype()->append(to_lower(consume_token(first, last))); while (first != last) { skipwhite(first, last); if (consume_char_if(first, last, ';')) { auto p = ct.add_parameters(); skipwhite(first, last); p->set_name(to_lower(consume_token(first, last))); skipwhite(first, last); if (consume_char_if(first, last, '=')) { skipwhite(first, last); p->set_value(consume_token_or_quoted(first, last)); } else { // no value on this parameter } } else if (consume_char_if(first, last, ',')) { // normally we should get the next value-token here but in the case of Content-Type // we must barf throw std::runtime_error("invalid use of ; in Content-Type"); } }}ContentType& populate(ContentType& ct, const std::string& header_value){ parse(ct, header_value.begin(), header_value.end()); return ct;} 解决方案 I've taken the code as posted by OP and given it a review.there's no need to specify void(). In fact it's preferable to use qi::unused_type in such cases, which is what rules will default to if no attribute type is declared.there no need for char_ if you don't wish to expose the attribute. Use lit instead.there is no need to wrap every char parser in a rule. That hurts performance. It's best to leave the proto expression tree un-evaluated as long so Qi can optimize parser expressions more, and the compiler can inline more. Also, Qi doesn't have move semantics on attributes, so avoiding redundant rules eliminates redundant copies of sub-attributes that get concatenated in the containing rules. Sample alternative spelling (caution, see Assigning parsers to auto variables)auto CR = qi::lit('\r');auto LF = qi::lit('\n');auto CRLF = qi::lit("\r\n");auto HT = qi::lit('\t');auto SP = qi::lit(' ');auto LWS = qi::copy(-CRLF >> +(SP | HT)); // deepcopyUPALPHA = char_('A', 'Z');LOALPHA = char_('a', 'z');ALPHA = UPALPHA | LOALPHA;DIGIT = char_('0', '9');//CTL = char_(0, 31) | char_(127);TEXT = char_("\t\x20-\x7e\x80-\xff");Since you didn't have to use char_, you also don't have kill the attribute using qi::omit[].When you are in a Qi domain expression template, raw string/char literals are implicitly wrapped in a qi::lit so, you can simply things likequoted_pair = omit[char_('\\')] >> char_;quoted_string = omit[char_('"')] >> *(qdtext | quoted_pair) >> omit[char_('"')];to justquoted_pair = '\\' >> char_;quoted_string = '"' >> *(qdtext | quoted_pair) >> '"';instead of spelling out skipping spaces with omit[*SP] all the time, just declare the rule with a skipper. Now, you can simplifynvp = token >> omit[*SP] >> omit['='] >> omit[*SP] >> value;any_parameter = omit[*SP] >> omit[char_(';')] >> omit[*SP] >> (nvp | name_only);content_type_rule = type_subtype_rule >> *any_parameter;to justnvp = token >> '=' >> value;any_parameter = ';' >> (nvp | name_only);content_type_rule = type_subtype_rule >> qi::skip(spaces)[*any_parameter]; Note that any subrule invocations of rules that are declared without a skipper are implicitly lexeme: Boost spirit skipper issuesthere were many redundant/unused headersrecent compilers + boost versions make BOOST_FUSION_ADAPT_STRUCT much simpler by using decltypeThe results of simplifying are much less noisy://#define BOOST_SPIRIT_DEBUG#include <boost/spirit/include/qi.hpp>#include <boost/fusion/include/adapted.hpp>struct parameter { boost::optional<std::string> name; std::string value;};struct type_subtype { std::string type; std::string subtype;};struct content_type { type_subtype type; std::vector<parameter> params;};BOOST_FUSION_ADAPT_STRUCT(type_subtype, type, subtype)BOOST_FUSION_ADAPT_STRUCT(content_type, type, params)template<class Iterator>struct token_grammar : qi::grammar<Iterator, content_type()>{ token_grammar() : token_grammar::base_type(content_type_rule) { using qi::ascii::char_; spaces = char_(' '); token = +~char_( "()<>@,;:\\\"/[]?={} \t"); quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"'; value = quoted_string | token; type_subtype_rule = token >> '/' >> token; name_only = token; nvp = token >> '=' >> value; any_parameter = ';' >> (nvp | name_only); content_type_rule = type_subtype_rule >> qi::skip(spaces) [*any_parameter]; BOOST_SPIRIT_DEBUG_NODES((nvp)(any_parameter)(content_type_rule)(quoted_string)(token)(value)(type_subtype_rule)) } private: using Skipper = qi::space_type; Skipper spaces; qi::rule<Iterator, binary_parameter(), Skipper> nvp; qi::rule<Iterator, parameter(), Skipper> any_parameter; qi::rule<Iterator, content_type()> content_type_rule; // lexemes qi::rule<Iterator, std::string()> quoted_string, token, value; qi::rule<Iterator, type_subtype()> type_subtype_rule; qi::rule<Iterator, unary_parameter()> name_only;};See it Live On Coliru (with the same test cases)BONUSI'd prefer a simpler AST in a case like this. By injecting some attribute values using qi::attr you can avoid using boost::variant and/or even avoid boost::optional:struct parameter { bool have_name; std::string name; std::string value;};struct type_subtype { std::string type; std::string subtype;};struct content_type { type_subtype type; std::vector<parameter> params;};BOOST_FUSION_ADAPT_STRUCT(parameter, have_name, name, value)BOOST_FUSION_ADAPT_STRUCT(type_subtype, type, subtype)BOOST_FUSION_ADAPT_STRUCT(content_type, type, params)namespace qi = boost::spirit::qi;template<class Iterator>struct token_grammar : qi::grammar<Iterator, content_type()>{ token_grammar() : token_grammar::base_type(content_type_rule) { using qi::ascii::char_; spaces = char_(' '); token = +~char_( "()<>@,;:\\\"/[]?={} \t"); quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"'; value = quoted_string | token; type_subtype_rule = token >> '/' >> token; name_only = qi::attr(false) >> qi::attr("") >> token; nvp = qi::attr(true) >> token >> '=' >> value; any_parameter = ';' >> (nvp | name_only); content_type_rule = type_subtype_rule >> qi::skip(spaces) [*any_parameter]; BOOST_SPIRIT_DEBUG_NODES((nvp)(any_parameter)(content_type_rule)(quoted_string)(token)(value)(type_subtype_rule)) } private: using Skipper = qi::space_type; Skipper spaces; qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter; qi::rule<Iterator, content_type()> content_type_rule; // lexemes qi::rule<Iterator, std::string()> quoted_string, token, value; qi::rule<Iterator, type_subtype()> type_subtype_rule;}; 这篇关于用提升精神充分解码http标头值的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！