中文斷詞

出自DILA Wiki

中研院的中文斷詞系統,有提供一個API的Interface 可以呼叫 但網頁上的解說並沒有很清楚 我在網路上找了一個例子,確定可以用:

注意:要使用 javac -encoding CP950 來compile 這個檔 檔案必須以"Big5"編碼存檔

import java.net.*; import java.io.*; import java.util.*;

public class Chinese {

    static int port = 1501;
    Socket socket;
    InputStream instream;
    OutputStream outstream;
    static final String ENCODING = "BIG5";
       
   /** Creates a new instance of Chinese */
   public Chinese() {
     String str = "<?xml version=\"1.0\"?><wordsegmentation version=\"0.1\"><option showcategory=\"1\"/><authentication username=\"wahahung\" password=\"wahaman\"/><text>復有無量無數菩薩摩訶薩眾</text></wordsegmentation>";
      try {
           socket = new Socket(InetAddress.getByName("140.109.19.104"), port);
           instream = socket.getInputStream();
           outstream = socket.getOutputStream();
           outstream.write(str.getBytes(ENCODING));
           outstream.flush();
           BufferedReader r = new BufferedReader(new InputStreamReader(instream,ENCODING));
           String line;
           // .........
           while ((line = r.readLine()) != null)
             System.out.println(line);
           outstream.close();
           r.close();
           socket.close();
       } catch (IOException e) {
           e.printStackTrace();
       }
   }
   
   public static void main(String args[]) {
      Chinese ch = new Chinese();
   }
   

}