中文斷詞
出自DILA Wiki
中研院的中文斷詞系統,有提供一個API的Interface 可以呼叫 但網頁上的解說並沒有很清楚 我在網路上找了一個例子,確定可以用:
注意:要使用 javac -encoding CP950 來compile 這個檔 檔案必須以"Big5"編碼存檔
import java.net.*;
import java.io.*;
import java.util.*;
// use javac -encoding CP950
public class Chinese {
static int port = 1501; Socket socket; InputStream instream; OutputStream outstream; static final String ENCODING = "BIG5"; /** Creates a new instance of Chinese */ public Chinese() { String str = "<?xml version=\"1.0\"?><wordsegmentation version=\"0.1\"><option showcategory=\"1\"/><authentication username=\"wahahung\" password=\"wahaman\"/><text>復有無量無數菩薩摩訶薩眾</text></wordsegmentation>"; try { socket = new Socket(InetAddress.getByName("140.109.19.104"), port); instream = socket.getInputStream(); outstream = socket.getOutputStream(); outstream.write(str.getBytes(ENCODING)); outstream.flush(); BufferedReader r = new BufferedReader(new InputStreamReader(instream,ENCODING)); String line; // ......... while ((line = r.readLine()) != null) System.out.println(line); outstream.close(); r.close(); socket.close(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String args[]) { Chinese ch = new Chinese(); }
}