我有一个lex文件,yacc文件和main.cpp文件。
我的main.cpp看起来像
int main(int argc, char **argv)
{
if (argc == 1)
{ int token;
curr_filename = "<stdin>";
yyin = stdin;
yyparse();
}
else
{
for (int i = 1; i < argc; ++i)
{
curr_filename = argv[i];
yyin = std::fopen(argv[i], "r");
if (yyin)
{
yyparse();
std::fclose(yyin);
}
else
{
utility::print_error(argv[i], "cannot be opened");
}
}
}
if (yynerrs > 0)
{
std::cerr << "Compilation halted due to lexical or syntax errors.\n";
exit(1);
}
这有助于进行解析。但是现在我也想打印从lex文件生成的 token 。所以我通过如下调用yylex()对它做了一些修改
int main(int argc, char **argv)
{
if (argc == 1)
{ int token;
curr_filename = "<stdin>";
yyin = stdin;
// calling yylex to get token
while(token= yylex())
{
switch(token){
case 258 :
std::cout << "class" ;
default :
std::cout << "token " ;
}
yyparse();
}
//rest of the code same
但没有输出任何内容。
任何有关如何在标准输出或文件上打印 token 的帮助
flex 文件
%option noyywrap
%option yylineno
%{
#include "flexbison.hpp"
#include "tokentable.hpp"
#include "symboltable.hpp"
#include "y.tab.h"
#include <stdio.h>
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
static const int MAX_STR_CONST = 1025;
char string_buf[MAX_STR_CONST]; // buffer to store string contstants encountered in source file
char *string_buf_ptr;
int num_comment = 0; // count to keep track how many opening comment tokens have been encountered
std::size_t curr_lineno = 0; // keep track of current line number of source file
bool str_too_long = false; // used to handle string constant size error check
%}
%x COMMENT
%x LINECOMMENT
%x STRING
DARROW =>
%%
"(*" {
BEGIN(COMMENT);
num_comment++;
}
"*)" {
if (num_comment <= 0) {
yylval.error_msg = "Unmatched *)";
return ERROR;
}
}
<COMMENT>"*)" {
num_comment--;
if (num_comment < 0) {
yylval.error_msg = "Unmatched *)";
return ERROR;
}
if (num_comment == 0) {
BEGIN(INITIAL);
}
}
<COMMENT>"(*" {
num_comment++;
}
<COMMENT>[^\n] {
// eat everything within comments
}
<COMMENT>\n {
++curr_lineno;
}
"--"[^\n]* {
BEGIN(LINECOMMENT);
}
<LINECOMMENT>\n {
++curr_lineno;
BEGIN(INITIAL);
}
<COMMENT><<EOF>> {
BEGIN(INITIAL);
yylval.error_msg = "EOF in comment";
return ERROR;
}
"=>" {
return DARROW;
}
(?i:class) {
return CLASS;
}
(?i:else) {
return ELSE;
}
(?i:in) {
return IN;
}
(?i:then) {
return THEN;
}
(?i:fi) {
return FI;
}
(?i:if) {
return IF;
}
(?i:inherits) {
return INHERITS;
}
(?i:let) {
return LET;
}
(?i:loop) {
return LOOP;
}
(?i:pool) {
return POOL;
}
(?i:while) {
return WHILE;
}
(?i:case) {
return CASE;
}
(?i:esac) {
return ESAC;
}
(?i:of) {
return OF;
}
(?i:new) {
return NEW;
}
(?i:isvoid) {
return ISVOID;
}
(?i:not) {
return NOT;
}
t(?i:rue) {
yylval.boolean = true;
return BOOL_CONST;
}
f(?i:alse) {
yylval.boolean = false;
return BOOL_CONST;
}
[0-9]+ {
yylval.symbol = inttable().add(yytext);
return INT_CONST;
}
"<=" {
return LE;
}
"<-" {
return ASSIGN;
}
[A-Z][a-zA-Z0-9_]* {
yylval.symbol = idtable().add(yytext);
return TYPEID;
}
[a-z][a-zA-Z0-9_]* {
yylval.symbol = idtable().add(yytext);
return OBJECTID;
}
";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"@" {
return *yytext;
}
\n {
++curr_lineno;
}
[ \f\r\t\v] {
// eat whitespace
}
/*
* String constants (C syntax)
* Escape sequence \c is accepted for all characters c. Except for
* \n \t \b \f, the result is c.
*
*/
\" {
BEGIN(STRING);
string_buf_ptr = string_buf;
memset(string_buf, 0, MAX_STR_CONST);
}
<STRING>\" {
BEGIN(INITIAL);
yylval.symbol = stringtable().add(string_buf);
return STR_CONST;
}
<STRING>\0[^\n]*\" {
BEGIN(INITIAL);
if (str_too_long) {
str_too_long = false;
}
else {
yylval.error_msg = "String contains null character";
return ERROR;
}
}
<STRING>\0[^"]*\n {
if (str_too_long) {
yyinput(); /* eat quote */
BEGIN(INITIAL);
str_too_long = false;
}
else {
if (yytext[yyleng - 1] != '\\') {
BEGIN(INITIAL);
yylval.error_msg = "String contains null character";
return ERROR;
}
}
}
<STRING><<EOF>> {
BEGIN(INITIAL);
yylval.error_msg = "EOF in string constant";
return ERROR;
}
<STRING>\\ {
if (strlen(string_buf) >= MAX_STR_CONST - 1) {
str_too_long = true;
unput('\0');
yylval.error_msg = "String constant too long";
return ERROR;
}
char ahead = yyinput();
switch (ahead) {
case 'b':
*string_buf_ptr++ = '\b';
break;
case 't':
*string_buf_ptr++ = '\t';
break;
case 'n':
*string_buf_ptr++ = '\n';
break;
case 'f':
*string_buf_ptr++ = '\f';
break;
case '\n':
++curr_lineno;
*string_buf_ptr++ = '\n';
break;
case '\0':
unput(ahead);
break;
default:
*string_buf_ptr++ = ahead;
}
}
<STRING>\n {
++curr_lineno;
BEGIN(INITIAL);
yylval.error_msg = "Unterminated string constant";
return ERROR;
}
<STRING>. {
if (strlen(string_buf) >= MAX_STR_CONST - 1) {
str_too_long = true;
unput('\0');
yylval.error_msg = "String constant too long";
return ERROR;
}
*string_buf_ptr++ = *yytext;
}
. /* error for invalid tokens */ {
yylval.error_msg = std::string(yytext) + " is not a valid character in the current context.";
return ERROR;
}
%%
野牛文件
%{
#include "flexbison.hpp"
#include "symboltable.hpp"
#include "tokentable.hpp"
#include "ast.hpp"
#include <iostream>
// convinience function for setting location of each ast node
#define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename)
// both defined in main.cpp
extern ProgramPtr ast_root;
extern std::string curr_filename;
// both defined in lexer
extern int yylex();
extern int yylineno;
void yyerror(char *);
%}
%token CLASS 258 ELSE 259 FI 260 IF 261 IN 262
%token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268
%token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274
%token <symbol> STR_CONST 275 INT_CONST 276
%token <boolean> BOOL_CONST 277
%token <symbol> TYPEID 278 OBJECTID 279
%token ASSIGN 280 NOT 281 LE 282 ERROR 283
%type <program> program
%type <clazz> class
%type <classes> class_list
%type <attribute> attribute
%type <attributes> attribute_list
%type <method> method
%type <methods> method_list
%type <expression> expression
%type <expression> let_expr
%type <expressions> expression_list
%type <expressions> method_expr_list
%type <formal> formal
%type <formals> formal_list
%type <branch> case
%type <cases> case_list
%nonassoc '='
%left LET
%right ASSIGN
%left NOT
%left '+' '-'
%left '*' '/'
%left ISVOID
%left '~'
%left '@'
%left '.'
%nonassoc LE '<'
%%
program : class_list { @$ = @1; ast_root = std::make_shared<Program>($1); }
;
class_list : class { $$ = Classes(); $$.push_back($1); }
| class_list class { $$.push_back($2); }
;
class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, @1); }
| CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, @1); }
| error ';' { yyerrok; }
;
attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); }
| attribute_list attribute ';' { $$.push_back($2); }
| error ';' { yyerrok; }
;
attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, @1); }
| OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, @5); }
;
method_list : method ';' { $$ = Methods(); $$.push_back($1); }
| method_list method ';' { $$.push_back($2); }
| error ';' { yyerrok; }
;
method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, @1); }
| OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, @1); }
;
formal_list : formal { $$ = Formals(); $$.push_back($1); }
| formal_list ',' formal { $$.push_back($3); }
;
formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, @1); }
;
case_list : case { $$ = Cases(); $$.push_back($1); }
| case_list case { $$.push_back($2); }
;
case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, @5); }
;
method_expr_list : expression { $$ = Expressions(); $$.push_back($1); }
| method_expr_list ',' expression { $$.push_back($3); }
;
expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); }
| expression_list expression ';' { $$.push_back($2); }
| error ';' { yyerrok; }
;
let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
| OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @5); }
| OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
| OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @4); }
| error ',' let_expr { yyerrok; }
;
expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, @3); }
| expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, @1); }
| expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, @1); }
| expression '@' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, @1); }
| expression '@' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, @1);}
| OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3);
SETLOC($$, @1); }
| OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions());
SETLOC($$, @1); }
| IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, @2); }
| WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, @2); }
| '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, @2); }
| LET let_expr { $$ = $2; SETLOC($$, @2); }
| CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, @2); }
| NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, @2); }
| ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, @2); }
| expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, @1); }
| expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, @1); }
| expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, @1); }
| expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, @1); }
| '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, @2); }
| expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, @1); }
| expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, @1); }
| expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, @1); }
| NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, @2); }
| '(' expression ')' { $$ = $2; SETLOC($$, @2); }
| OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, @1); }
| INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, @1); }
| STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, @1); }
| BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, @1); }
;
%%
// utility function for converting bison tokens to its string representation
// for better error reporting
std::string convert_token(int token)
{
std::string rep;
switch (token)
{
case CLASS: rep = "class"; break;
case ELSE: rep = "else"; break;
case FI: rep = "fi"; break;
case IF: rep = "if"; break;
case IN: rep = "in"; break;
case INHERITS: rep = "inherits"; break;
case LET: rep = "let"; break;
case LOOP: rep = "loop"; break;
case POOL: rep = "pool"; break;
case THEN: rep = "then"; break;
case WHILE: rep = "while"; break;
case CASE: rep = "case"; break;
case ESAC: rep = "esac"; break;
case OF: rep = "of"; break;
case DARROW: rep = "=>"; break;
case NEW: rep = "new"; break;
case ISVOID: rep = "isvoid"; break;
case ASSIGN: rep = "<-"; break;
case NOT: rep = "not"; break;
case LE: rep = "<="; break;
case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break;
case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break;
case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break;
case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break;
case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break;
default: rep = (char) token;
}
return rep;
}
void yyerror(char *)
{
if (yylval.error_msg.length() <= 0)
std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << "syntax error near or at character or token '" << convert_token(yychar) << "'\n";
else
std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "\n";
}
最佳答案
我不确定为什么您看不到任何输出,但是我没有浏览所有这些代码。如果从yylex
调用main
,它将读取并有效地丢弃一个 token 。然后,当您调用yyparse
时,yyparse
会自行调用yylex
,直到yylex
返回0。大概(但不确定),下一次您从yylex
的while
循环中调用main
时,它将再次返回0,并且循环将结束。结果应该是从while
循环中打印了一个单词,然后是yyparse
(如果有)产生的任何输出,这可能表示语法错误,因为它永远不会从输入中看到第一个标记。
我怀疑这是您要执行的操作,但尚不完全清楚。
如果要在标记被词法化时查看它们,请插入语句以在每个词法操作中打印标记。或告诉flex
调用其他扫描函数,例如yylex_internal
,并创建自己的函数yylex()
,该函数调用yylex_internal
,然后在返回结果之前打印结果。
如果您似乎仅出于调试目的对此感兴趣,那么最好使用-d
的flex
命令行选项,它将自动生成调试输出。它可能不完全是您想要的调试格式,但是执行和撤消操作要容易得多:)
要更改yylex
生成的flex
函数的名称,请在.l
文件顶部的代码块中插入以下内容:
#define YY_DECL int yylex_internal()
flex生成的文件声明扫描功能如下:
YY_DECL {
/* body of function
}
因此,您可以重命名该函数或添加参数,甚至可以通过定义
YY_DECL
宏来更改返回类型。请参见flex手册的Generated Scanner部分。顺便说一句,即使野牛允许您手动标记所有终端 token ,通常也不被认为是一种好方法。您应该只给
bison
编号,然后通过#include "y.tab.h"
(或任何您称为bison头文件的名称;在源文件中包括这些定义;您可以使用-o
选项轻松更改名称)。关于c++ - 使用lex和yacc打印 token ,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/22543130/