import java.util.*;
import java.io.*;
import java.text.*;
// ต้องใช้ Java 5
public class WordCount {
public static void main(String[] args) throws Exception {
String fn = "2550.txt";
BufferedReader fi = new BufferedReader(new FileReader(fn));
Map< String, Integer> wordCount = new TreeMap< String, Integer>();
String line;
while ((line = fi.readLine()) != null) {
BreakIterator boundary = BreakIterator.getWordInstance(new Locale("th"));
boundary.setText(line);
int start = boundary.first();
int end = boundary.next();
while (end != BreakIterator.DONE) {
String word = line.substring(start, end).trim();
if (!word.equals("")) {
Integer c = wordCount.get(word);
wordCount.put(word, new Integer(c == null ? 1 : (c + 1)));
}
start = end;
end = boundary.next();
}
}
fi.close();
System.out.println(wordCount.toString());
}
}
ปี 40 มี 38334 คำ ปี 50 มี 45719 คำ มีความยาวของ LCS เป็น 19668 คำ
(LCS คงบอกอะไรไม่ได้มาก เพราะถ้ามีการสลับมาตราหรือหมวด จะได้ความยาวที่สั้นลง แต่อย่างน้อยก็บอก lower bound ของส่วนร่วม ความจริงน่าจะแยกหาตามหมวดตามมาตรา แต่เริ่มง่วงนอน...)
import java.util.*;
import java.io.*;
import java.text.*;
public class LLCS {
public static void main(String[] args) throws IOException {
String[] s2550 = getWords("2550.txt");
String[] s2540 = getWords("2540.txt");
System.out.println("2540 มี " + s2540.length + "คำ, 2550 มี " + s2550.length + "คำ");
System.out.println("Length of LCS = " + llcs(s2540, s2550));
}
//-----------------------------------------------------
static String[] getWords(String fn) throws IOException {
BufferedReader fi = new BufferedReader(new FileReader(fn));
String line;
ArrayList< String> words = new ArrayList< String>();
while ((line = fi.readLine()) != null) {
BreakIterator boundary = BreakIterator.getWordInstance(new Locale("th"));
boundary.setText(line);
int start = boundary.first();
int end = boundary.next();
while (end != BreakIterator.DONE) {
String word = line.substring(start, end).trim();
if (!word.equals("")) {
if (Character.isLetter(word.charAt(0))) words.add(word);
}
start = end;
end = boundary.next();
}
}
fi.close();
return words.toArray(new String[0]);
}
//-----------------------------------------------------------
static int llcs(String[] x, String[] y) {
int[] L0 = new int[y.length + 1];
int[] L1 = new int[y.length + 1];
for (int j = 1; j < L0.length; j++) L0[j] = 0;
for (int i = 0; i < x.length; i++) {
L1[0] = 0;
for (int j = 1; j < L1.length; j++) {
if (x[i].equals(y[j - 1])) {
L1[j] = 1 + L0[j - 1];
} else {
L1[j] = Math.max(L0[j], L1[j - 1]);
}
}
if (i % 100 == 0) System.out.println(i);
int[] t = L0; L0 = L1; L1 = t;
}
return L0[L0.length - 1];
}
}