参考:http://www.cnblogs.com/yanlingyin/archive/2012/04/17/2451717.html

实现了一个简单的java词法分析器

功能:词法分析下面一段java小程序

 int sum = 0; for(int i = 1; i <= 100; i = i + 1) sum += i;#

1.程序片段中使用到的关键字、运算符和界符:

关键词:

Int for

运算符:

+ =

界符:

( ) <= #

2.单词和单词种别码设计

单词符号

种别码

Int

1

For

2

Letter(letter|digit)*

3

Digit|digit*

4

*

5

/

6

+

7

-

8

>

9

>=

10

<

11

<=

12

=

13

;

14

(

15

)

16

#

17

词法分析器源代码

 package com.gxf.lexical;

 import java.util.Scanner;

 public class Lexical {
String input = ""; //源程序
String keyWords[] = {"int", "for"}; //关键词
int point = 0; //全局指针指向源程序
int syn = 0; //单词种别码
int sum = 0; //数字的总和
StringBuffer token = new StringBuffer(""); //存放单词 public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
Lexical lexical = new Lexical(); lexical.input = scanner.nextLine();
// System.out.println(lexical.input);//输入源程序
scanner.close(); do{
lexical.scanner();
switch(lexical.syn){
case 4:
System.out.println("(" + lexical.syn + "," + lexical.sum + ")");
break;
default:
System.out.println("(" + lexical.syn + "," + lexical.token + ")");
break;
}
}while(lexical.syn != 17);
} /**
* 词法分析器
*/
public void scanner(){
//将单词置为空
token = new StringBuffer();
while(' ' == input.charAt(point))
point++; //去掉空格
if((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') ||
(input.charAt(point) >= 'A' && input.charAt(point) <= 'Z')){//关键词或者标识符
syn = 3;//种别码为3
while((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') ||
(input.charAt(point) >= 'A' && input.charAt(point) <= 'Z') ||
(input.charAt(point) >= '0' && input.charAt(point) <= '9')){
token.append(input.charAt(point));
point++;
}
// point--;//后退一个位置
for(int i = 0; i < keyWords.length; i++){
if(keyWords[i].equals(String.valueOf(token))){
syn = i + 1;//修改种别码
break;
}
}
}//if
else if(input.charAt(point) >= '0' && input.charAt(point) <= '9'){//如果是数字
syn = 4;
sum = 0;
while(input.charAt(point) >= '0' && input.charAt(point) <= '9'){
sum = sum * 10 + (input.charAt(point) - '0');
point++;
}
//point--;//后退一个字符
}//else if
else{//其他字符
switch(input.charAt(point)){
case '>'://大于符号
token = new StringBuffer(">");//重置token
point++;
if(input.charAt(point) == '='){
token.append("=");
syn = 10;
}else{
syn = 9;
point--;
}
point++;
break;
case '<':
token = new StringBuffer("<");
point++;
if(input.charAt(point) == '='){
token.append("=");
syn = 12;
}else{
syn = 11;
point--;
}
point++;
break;
case '*':
token = new StringBuffer("*");
syn = 5;
point++;
break;
case '/':
token = new StringBuffer("/");
syn = 6;
point++;
break;
case '+':
token = new StringBuffer("+");
syn = 7;
point++;
break;
case '-':
token = new StringBuffer("-");
syn = 8;
point++;
break;
case ';':
token = new StringBuffer(";");
syn = 14;
point++;
break;
case '(':
token = new StringBuffer("(");
syn = 15;
point++;
break;
case ')':
token = new StringBuffer(")");
syn = 16;
point++;
break;
case '#':
token = new StringBuffer("#");
syn = 17;
point++;
break;
case '=':
token = new StringBuffer("=");
syn = 13;
point++;
break;
} }
}
}

java 词法分析器-LMLPHP

注意程序片段要以#号结束

其实,上面参考的博客写得还不错可以看看

05-08 14:57