参考:http://www.cnblogs.com/yanlingyin/archive/2012/04/17/2451717.html
实现了一个简单的java词法分析器
功能:词法分析下面一段java小程序
int sum = 0; for(int i = 1; i <= 100; i = i + 1) sum += i;#
1.程序片段中使用到的关键字、运算符和界符:
关键词:
Int for
运算符:
+ =
界符:
( ) <= #
2.单词和单词种别码设计
单词符号 | 种别码 |
Int | 1 |
For | 2 |
Letter(letter|digit)* | 3 |
Digit|digit* | 4 |
* | 5 |
/ | 6 |
+ | 7 |
- | 8 |
> | 9 |
>= | 10 |
< | 11 |
<= | 12 |
= | 13 |
; | 14 |
( | 15 |
) | 16 |
# | 17 |
词法分析器源代码
package com.gxf.lexical; import java.util.Scanner; public class Lexical {
String input = ""; //源程序
String keyWords[] = {"int", "for"}; //关键词
int point = 0; //全局指针指向源程序
int syn = 0; //单词种别码
int sum = 0; //数字的总和
StringBuffer token = new StringBuffer(""); //存放单词 public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
Lexical lexical = new Lexical(); lexical.input = scanner.nextLine();
// System.out.println(lexical.input);//输入源程序
scanner.close(); do{
lexical.scanner();
switch(lexical.syn){
case 4:
System.out.println("(" + lexical.syn + "," + lexical.sum + ")");
break;
default:
System.out.println("(" + lexical.syn + "," + lexical.token + ")");
break;
}
}while(lexical.syn != 17);
} /**
* 词法分析器
*/
public void scanner(){
//将单词置为空
token = new StringBuffer();
while(' ' == input.charAt(point))
point++; //去掉空格
if((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') ||
(input.charAt(point) >= 'A' && input.charAt(point) <= 'Z')){//关键词或者标识符
syn = 3;//种别码为3
while((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') ||
(input.charAt(point) >= 'A' && input.charAt(point) <= 'Z') ||
(input.charAt(point) >= '0' && input.charAt(point) <= '9')){
token.append(input.charAt(point));
point++;
}
// point--;//后退一个位置
for(int i = 0; i < keyWords.length; i++){
if(keyWords[i].equals(String.valueOf(token))){
syn = i + 1;//修改种别码
break;
}
}
}//if
else if(input.charAt(point) >= '0' && input.charAt(point) <= '9'){//如果是数字
syn = 4;
sum = 0;
while(input.charAt(point) >= '0' && input.charAt(point) <= '9'){
sum = sum * 10 + (input.charAt(point) - '0');
point++;
}
//point--;//后退一个字符
}//else if
else{//其他字符
switch(input.charAt(point)){
case '>'://大于符号
token = new StringBuffer(">");//重置token
point++;
if(input.charAt(point) == '='){
token.append("=");
syn = 10;
}else{
syn = 9;
point--;
}
point++;
break;
case '<':
token = new StringBuffer("<");
point++;
if(input.charAt(point) == '='){
token.append("=");
syn = 12;
}else{
syn = 11;
point--;
}
point++;
break;
case '*':
token = new StringBuffer("*");
syn = 5;
point++;
break;
case '/':
token = new StringBuffer("/");
syn = 6;
point++;
break;
case '+':
token = new StringBuffer("+");
syn = 7;
point++;
break;
case '-':
token = new StringBuffer("-");
syn = 8;
point++;
break;
case ';':
token = new StringBuffer(";");
syn = 14;
point++;
break;
case '(':
token = new StringBuffer("(");
syn = 15;
point++;
break;
case ')':
token = new StringBuffer(")");
syn = 16;
point++;
break;
case '#':
token = new StringBuffer("#");
syn = 17;
point++;
break;
case '=':
token = new StringBuffer("=");
syn = 13;
point++;
break;
} }
}
}
注意程序片段要以#号结束
其实,上面参考的博客写得还不错可以看看