我正在为JACK语言编写一个lexer,作为我正在编写的编译器的一部分,我的lexem列表中不断出现分段错误。我有一个变量,它是指向存储lexem列表的标记的指针。它被传递给两个不同的函数,这两个函数都为它分配内存。这个问题是对我之前的问题here的更新,包含了所有代码。
主c
#include <stdio.h>
#include <stdlib.h>
#include "jlex.h"
int main(int argc, char * argv[])
{
FILE * sourceFile;
int lexerStatus;
token ** tokenList = NULL;
printf("Attempting to open file...\n");
if(argc > 1) {
if(!(sourceFile = fopen(argv[1], "r"))) {
fprintf(stderr, "Error: Could not open file \'%s\'!\n", argv[1]);
return FILE_ERROR;
}
} else {
fprintf(stderr, "Error: No input file given!\n");
return FILE_ERROR;
}
printf("Success!\nLexing input file...\n");
if((lexerStatus = lexer(&tokenList, sourceFile)) != EXEC_SUCCESS) {
fprintf(stderr, "Error: Failed to lex source file! (%d)\n", lexerStatus);
return lexerStatus;
}
fclose(sourceFile);
printf("Lexing complete!\n");
printf("Token Name\tToken Type\tLine Number\n");
for(token * currToken = tokenList[0]; currToken->type != terminator; currToken++) {
if(currToken->type == integer || currToken->type == keyword || currToken->type == identifier)
printf("%s", currToken->string);
else
putchar(currToken->character);
printf("\t\t%d\t\t%d\n", currToken->type, currToken->lineNum);
}
return EXEC_SUCCESS;
}
jlex.h公司
#ifndef JLEX_H
#define JLEX_H
#include <stdio.h> /* Required for FILE data type */
#define EXEC_SUCCESS 0
#define FILE_ERROR 1
#define MEM_ERROR 2
#define LEX_ERROR 3
#define DEFAULT_LIST_SIZE 1024
typedef enum tokenTypes { keyword, identifier, operator, string, integer, punctuator, terminator } tokenName;
typedef struct token {
union {
char * string;
int character;
};
tokenName type;
int lineNum;
} token;
extern const char * const keywords[];
extern const char * const operators;
extern const char * const punctuators;
int addTokenToList(token * nextToken, token *** tokenList);
int getNextToken(token * nextToken, FILE * sourceFile);
int lexer(token *** tokenList, FILE * sourceFile);
#endif
jlex.c公司
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "jlex.h"
const char * const tokenTypeNames[] = { "keyword", "identifier", "operator", "string", "integer", "punctuator", "terminator" };
const char * const keywords[] = { "boolean",
"char",
"class",
"constructor",
"do",
"else",
"false",
"field",
"function",
"if",
"int",
"method",
"null",
"return",
"static",
"true",
"this"
"var",
"void",
"while" };
const char * const operators = "+-*/&|~<>+=";
const char * const punctuators = "({[)}],.;";
static inline bool isoperator(int c)
{
for(unsigned int i = 0; i < strlen(operators); i++)
if(c == operators[i])
return true;
return false;
}
static inline bool ispunctuator(int c)
{
for(unsigned int i = 0; i < strlen(punctuators); i++)
if(c == punctuators[i])
return true;
return false;
}
static inline bool iskeyword(char * string)
{
for(unsigned int i = 0; i < sizeof(keywords) / sizeof(char*); i++)
if(!strcmp(keywords[i], string))
return true;
return false;
}
int getNextToken(token * nextToken, FILE * sourceFile)
{
/*
* Skip all whitespace and comments
* From first token try and determine token type (possible if an operator, punctuator, terminator, integer, or terminator)
* If token is determined then build it and return lexer status
* if not then keep reading until a full token can be contstructed
* Change chosen delimiters based on what kind of token we think we're reading
* Return lexer status (might fail if invalid lexeme is detected, i.e. a number followed by letters)
*/
int c;
static int lineNum = 1;
do {
c = fgetc(sourceFile);
if(c == '\n')
lineNum++;
} while((c == '\n') || (c == '\t') || (c == ' '));
nextToken->lineNum = lineNum;
nextToken->character = c;
if(c == EOF) {
nextToken->type = terminator;
return EXEC_SUCCESS;
}
if(isoperator(c)) {
nextToken->type = operator;
return EXEC_SUCCESS;
}
if(ispunctuator(c)) {
nextToken->type = punctuator;
return EXEC_SUCCESS;
}
/* If we get to this point then c is not a single character lexeme so we need to allocate some space for it in the token */
if(!(nextToken->string = malloc(1024 * sizeof(char))))
return MEM_ERROR;
int pos = 0;
if(isdigit(c)) {
do {
nextToken->string[pos++] = c;
c = fgetc(sourceFile);
} while(isdigit(c) && pos < 1023);
nextToken->string[pos] = '\0';
if(!isoperator(c) && !ispunctuator(c) && !isspace(c))
return LEX_ERROR;
nextToken->type = integer;
return EXEC_SUCCESS;
}
/* If we get to this point then we have to be reading an identifier or a keyword */
do {
nextToken->string[pos++] = c;
c = fgetc(sourceFile);
} while((isalpha(c) || isdigit(c) || c == '_') && pos < 1023);
nextToken->string[pos] = '\0';
if(iskeyword(nextToken->string))
nextToken->type = keyword;
else
nextToken->type = identifier;
return EXEC_SUCCESS;
}
int addTokenToList(token * nextToken, token *** tokenList)
{
static unsigned int listSize = DEFAULT_LIST_SIZE;
static unsigned int tokenNum = 0;
if(listSize <= tokenNum) {
listSize *= 2;
if(!(*tokenList = realloc(*tokenList, listSize * sizeof(token *)))) /* If the list isn't large enough then double its size */
return MEM_ERROR;
}
if(!(tokenList[tokenNum] = malloc(sizeof(token)))) /* Allocate memory for the data we are about to copy */
return MEM_ERROR;
memcpy(tokenList[tokenNum++], nextToken, sizeof(token)); /* Copy token into the array */
if(nextToken->type == terminator)
if(!(*tokenList = realloc(*tokenList, tokenNum * sizeof(token *)))) /* After EOF we know what the final size of the list is so resize it appropriately */
return MEM_ERROR;
return EXEC_SUCCESS;
}
int lexer(token *** tokenList, FILE * sourceFile)
{
int status;
token nextToken;
if(!(*tokenList = malloc(DEFAULT_LIST_SIZE * sizeof(token *))))
return MEM_ERROR;
do {
status = getNextToken(&nextToken, sourceFile);
if(addTokenToList(&nextToken, tokenList) != EXEC_SUCCESS)
status = MEM_ERROR;
} while(nextToken.type != terminator && status == EXEC_SUCCESS);
return status;
}
测试文件:
杰克示例.jack
class Main {
function void main () {
var Array a;
var int length;
var int i, sum;
let length = Keyboard.readInt();
let a = Array.new(length);
let i = 0;
while (i < length) {
let a[i] = Keyboard.readInt();
let sum = sum + a[i];
let i= i+1;
}
do Output.printString();
do Output.printInt(sum / length);
do Output.println();
return;
}
}
在该源文件上运行时,程序输出以下内容:
Attempting to open file...
Success!
Lexing input file...
Lexing complete!
Token Name Token Type Line Number
class 0 1
1041 0
Segmentation fault
Valgrind输出识别与
malloc()
函数中的addTokenToList()
调用相关联的多个错误。注意:上面的JACK源文件不是有效的JACK,而是lexer当前状态下应该能够处理的版本。它还不能处理字符串文本和注释。
最佳答案
您错过了2次在addTokenList中取消引用tokenList:
if(!((*tokenList)[tokenNum] = malloc(sizeof(token)))) /* Allocate memory for the data we are about to copy */
memcpy((*tokenList)[tokenNum++], nextToken, sizeof(token)); /* Copy token into the array */
在主循环中写入令牌是错误的,例如
int i = 0;
for(token * currToken = tokenList[i]; currToken->type != terminator; currToken = tokenList[++i]) {
现在,除了内存泄漏之外,执行没有任何错误:
pi@raspberrypi:/tmp $ valgrind ./a.out jackExample.jack
==17597== Memcheck, a memory error detector
==17597== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==17597== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==17597== Command: ./a.out jackExample.jack
==17597==
Attempting to open file...
Success!
Lexing input file...
Lexing complete!
Token Name Token Type Line Number
class 0 1
Main 1 1
{ 5 1
function 0 2
void 0 2
main 1 2
( 5 2
) 5 2
{ 5 2
var 1 3
Array 1 3
a 1 3
var 1 4
int 0 4
length 1 4
var 1 5
int 0 5
i 1 5
sum 1 5
let 1 7
length 1 7
= 2 7
Keyboard 1 7
readInt 1 7
) 5 7
; 5 7
let 1 8
a 1 8
= 2 8
Array 1 8
new 1 8
length 1 8
; 5 8
let 1 9
i 1 9
= 2 9
0 4 9
while 0 11
( 5 11
i 1 11
< 2 11
length 1 11
{ 5 11
let 1 12
a 1 12
i 1 12
= 2 12
Keyboard 1 12
readInt 1 12
) 5 12
; 5 12
let 1 13
sum 1 13
= 2 13
sum 1 13
+ 2 13
a 1 13
i 1 13
; 5 13
let 1 14
i 1 14
i 1 14
1 4 14
} 5 15
do 0 17
Output 1 17
printString 1 17
) 5 17
; 5 17
do 0 18
Output 1 18
printInt 1 18
sum 1 18
/ 2 18
length 1 18
; 5 18
do 0 19
Output 1 19
println 1 19
) 5 19
; 5 19
return 0 20
} 5 21
} 5 22
==17597==
==17597== HEAP SUMMARY:
==17597== in use at exit: 58,704 bytes in 142 blocks
==17597== total heap usage: 147 allocs, 5 frees, 88,496 bytes allocated
==17597==
==17597== LEAK SUMMARY:
==17597== definitely lost: 340 bytes in 1 blocks
==17597== indirectly lost: 58,364 bytes in 141 blocks
==17597== possibly lost: 0 bytes in 0 blocks
==17597== still reachable: 0 bytes in 0 blocks
==17597== suppressed: 0 bytes in 0 blocks
==17597== Rerun with --leak-check=full to see details of leaked memory
==17597==
==17597== For counts of detected and suppressed errors, rerun with: -v
==17597== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 3)
关于c - Lexer for JACK语言中的段错误,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/54610111/