将txt中的英语单词按词频排序(JAVA)
将txt中的英语单词按词频排序(JAVA)
长度为1的单词被忽略了
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
public class Main {
public static void main(String[] Zing) {
String path = "E:\\Workplace\\CET_6_words_rank\\data\\cet6_words.txt";
ArrayList<String> lines = read(path);
int allwords = lines.size();
Map<String, Integer> map = new TreeMap<String,Integer>();
map.put("a", 1);
for(int i = 0;i < allwords;i ++) {
String word = lines.get(i);
if(map.containsKey(word)) {
int value = map.get(word);
map.replace(word, value + 1);
}else {
map.put(word, 1);
}
}
map = sortByValueDescending(map);
ArrayList<String> endinglines = new ArrayList<String>();
for (Entry<String, Integer> entry : map.entrySet()) {
String temp = entry.getKey();
for(int j = 0;j < 40 - temp.replace(" ", "").length();j ++) {
temp = temp + " ";
}
endinglines.add(temp + entry.getValue());
}
writeFile1("E:\\Workplace\\CET_6_words_rank\\data\\ending.txt",endinglines);
}
public static ArrayList<String> read(String path) {
FileInputStream fis = null;
InputStreamReader isr = null;
BufferedReader br = null;
ArrayList<String> ending = new ArrayList<String>();
try {
fis = new FileInputStream(path);
isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null)//去除所有非英文
{
String line0 = line.toLowerCase();
line0 = line0.replaceAll(" ", "jkhgfdgdsbdjkkjgsff");
line0 = line0.replaceAll("-", "bisdfguisdogfckxjg");
line0 = line0.replaceAll("[^a-z^A-Z]", "");
line0 = line0.replaceAll("jkhgfdgdsbdjkkjgsff", " ");
line0 = line0.replaceAll("bisdfguisdogfckxjg", "-");
String regex = ".*[a-zA-z].*";
if(line0.matches(regex)) {
String[] words = line0.split(" ");
for(int i = 0;i < words.length;i ++) {
if(words[i].length() > 1) {
ending.add(words[i]);
}
}
}
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
br.close(); // 关闭最后一个类,会将所有的底层流都关闭
} catch (Exception ex) {
ex.printStackTrace();
}
}
return ending;
}
public static <K, V extends Comparable<? super V>> Map<K, V> sortByValueDescending(Map<K, V> map)
{
List<Map.Entry<K, V>> list = new LinkedList<Map.Entry<K, V>>(map.entrySet());
Collections.sort(list, new Comparator<Map.Entry<K, V>>()
{
@Override
public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2)
{
int compare = (o1.getValue()).compareTo(o2.getValue());
return -compare;
}
});
Map<K, V> result = new LinkedHashMap<K, V>();
for (Map.Entry<K, V> entry : list) {
result.put(entry.getKey(), entry.getValue());
}
return result;
}
public static void writeFile1(String path,ArrayList<String> lines) {
FileOutputStream fos = null;
OutputStreamWriter osw = null;
BufferedWriter bw = null;
try {
fos = new FileOutputStream(path); // 节点类
osw = new OutputStreamWriter(fos, "UTF-8"); // 转化类
bw = new BufferedWriter(osw); // 装饰类
for(int i = 0;i < lines.size();i ++) {
bw.write(lines.get(i));
bw.newLine();
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
bw.close(); // 关闭最后一个类,会将所有的底层流都关闭
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
Written on October 16, 2019