一、统计字母的频率,并按照由大到小的频率输出
package org.yuan.HelloWorld; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; public class TestFile1 { public static void main(String[] args)throws IOException { List<Integer> list=new ArrayList<>(); DecimalFormat df=new DecimalFormat("######0.00"); FileInputStream fip = new FileInputStream("d:/Harry Potter and the Sorcerer's Stone.txt"); InputStreamReader reader = new InputStreamReader(fip, "gbk"); StringBuffer sb = new StringBuffer(); while (reader.ready()) { sb.append((char) reader.read()); } reader.close(); fip.close(); int i; String A=sb.toString(); String M="abcdefghijklmnopqrstuvwxyz"; char NUM[]=new char[A.length()]; char Z[]=new char[26]; int X[]=new int[26]; Z=M.toCharArray(); for(int k=0;k<26;k++) { X[k]=0; for(i=0;i<A.length();i++) { NUM[i]=A.charAt(i); if(Z[k]==NUM[i]||Z[k]==ch(NUM[i])) { X[k]++; } } } double sum=0; for(i=0;i<25;i++) for(int k=0;k<25-i;k++) { if(X[k]<X[k+1]) { int temp2=X[k]; X[k]=X[k+1]; X[k+1]=temp2; char temp3=Z[k]; Z[k]=Z[k+1]; Z[k+1]=temp3; } } for(i=0;i<26;i++) { sum=sum+X[i]; } System.out.println("一共有"+sum+"个字母。"); System.out.println("各字母频率如下:"); for(i=0;i<26;i++) { double jkl=(X[i])/sum*100; System.out.println(Z[i]+":"+df.format(jkl)+"%"); } } static char ch(char c) { if(!(c>=97&&c<=122)) c+=32; return c; } }
二、统计单词的数量,并输出出现频率最高的前N个单词(N手动输入)
package org.yuan.HelloWorld; import java.io.*; import java.util.*; import java.util.Map.Entry; public class TestFile2 { public static int n=0; public static void main(String[] args) { Scanner sc=new Scanner(System.in); String s; int count=0; int num=1; //作为FileReader和FileWriter读取的对象 String file1="d:/Harry Potter and the Sorcerer's Stone.txt"; try { BufferedReader a=new BufferedReader(new FileReader(file1)); StringBuffer c=new StringBuffer(); //将文件内容存入StringBuffer中 while((s = a.readLine()) != null) { //用于拼接字符串 c.append(s); } //将StringBuffer转换成String,然后再将所有字符转化成小写字符 String m=c.toString().toLowerCase(); //匹配由数字和26个字母组成的字符串 String [] d=m.split("[^a-zA-Z0-9]+"); //遍历数组将其存入Map<String, Integer>中 Map<String , Integer> myTreeMap=new TreeMap<String, Integer>(); for(int i = 0; i < d.length; i++) { //containsKey()方法用于检查特定键是否在TreeMap中映射 if(myTreeMap.containsKey(d[i])) { count = myTreeMap.get(d[i]); myTreeMap.put(d[i], count + 1); } else { myTreeMap.put(d[i], 1); } } //通过比较器实现排序 List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(myTreeMap.entrySet()); //按降序排序 Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Entry<String, Integer> k1, Entry<String, Integer> k2) { //返回两个单词出现次数较多的那个单词的出现次数 return k2.getValue().compareTo(k1.getValue()); } }); System.out.println("请输入N:"); n=sc.nextInt(); for(Map.Entry<String, Integer> map : list) { if(num <= n) { //输出到程序控制台 System.out.println(map.getKey() + ":" + map.getValue()); num++; } //输出完毕退出 else break; } //关闭文件指针 a.close(); // b.close(); } catch(FileNotFoundException e) { System.out.println("找不到指定文件"); } catch(IOException e) { System.out.println("文件读取错误"); } } }