import java.util.*; import java.io.*; import java.text.*; // ต้องใช้ Java 5 public class WordCount { public static void main(String[] args) throws Exception { String fn = "2550.txt"; BufferedReader fi = new BufferedReader(new FileReader(fn)); Map< String, Integer> wordCount = new TreeMap< String, Integer>(); String line; while ((line = fi.readLine()) != null) { BreakIterator boundary = BreakIterator.getWordInstance(new Locale("th")); boundary.setText(line); int start = boundary.first(); int end = boundary.next(); while (end != BreakIterator.DONE) { String word = line.substring(start, end).trim(); if (!word.equals("")) { Integer c = wordCount.get(word); wordCount.put(word, new Integer(c == null ? 1 : (c + 1))); } start = end; end = boundary.next(); } } fi.close(); System.out.println(wordCount.toString()); } }
ปี 40 มี 38334 คำ ปี 50 มี 45719 คำ มีความยาวของ LCS เป็น 19668 คำ
(LCS คงบอกอะไรไม่ได้มาก เพราะถ้ามีการสลับมาตราหรือหมวด จะได้ความยาวที่สั้นลง แต่อย่างน้อยก็บอก lower bound ของส่วนร่วม ความจริงน่าจะแยกหาตามหมวดตามมาตรา แต่เริ่มง่วงนอน...)
import java.util.*; import java.io.*; import java.text.*; public class LLCS { public static void main(String[] args) throws IOException { String[] s2550 = getWords("2550.txt"); String[] s2540 = getWords("2540.txt"); System.out.println("2540 มี " + s2540.length + "คำ, 2550 มี " + s2550.length + "คำ"); System.out.println("Length of LCS = " + llcs(s2540, s2550)); } //----------------------------------------------------- static String[] getWords(String fn) throws IOException { BufferedReader fi = new BufferedReader(new FileReader(fn)); String line; ArrayList< String> words = new ArrayList< String>(); while ((line = fi.readLine()) != null) { BreakIterator boundary = BreakIterator.getWordInstance(new Locale("th")); boundary.setText(line); int start = boundary.first(); int end = boundary.next(); while (end != BreakIterator.DONE) { String word = line.substring(start, end).trim(); if (!word.equals("")) { if (Character.isLetter(word.charAt(0))) words.add(word); } start = end; end = boundary.next(); } } fi.close(); return words.toArray(new String[0]); } //----------------------------------------------------------- static int llcs(String[] x, String[] y) { int[] L0 = new int[y.length + 1]; int[] L1 = new int[y.length + 1]; for (int j = 1; j < L0.length; j++) L0[j] = 0; for (int i = 0; i < x.length; i++) { L1[0] = 0; for (int j = 1; j < L1.length; j++) { if (x[i].equals(y[j - 1])) { L1[j] = 1 + L0[j - 1]; } else { L1[j] = Math.max(L0[j], L1[j - 1]); } } if (i % 100 == 0) System.out.println(i); int[] t = L0; L0 = L1; L1 = t; } return L0[L0.length - 1]; } }