中文斷詞
出自DILA Wiki
中研院的中文斷詞系統,有提供一個API的Interface 可以呼叫 但網頁上的解說並沒有很清楚 我在網路上找了一個例子,確定可以用:
注意:要使用 javac -encoding CP950 來compile 這個檔 檔案必須以"Big5"編碼存檔
import java.net.*; import java.io.*; import java.util.*;
public class Chinese {
static int port = 1501;
Socket socket;
InputStream instream;
OutputStream outstream;
static final String ENCODING = "BIG5";
/** Creates a new instance of Chinese */
public Chinese() {
String str = "<?xml version=\"1.0\"?><wordsegmentation version=\"0.1\"><option showcategory=\"1\"/><authentication username=\"wahahung\" password=\"wahaman\"/><text>復有無量無數菩薩摩訶薩眾</text></wordsegmentation>";
try {
socket = new Socket(InetAddress.getByName("140.109.19.104"), port);
instream = socket.getInputStream();
outstream = socket.getOutputStream();
outstream.write(str.getBytes(ENCODING));
outstream.flush();
BufferedReader r = new BufferedReader(new InputStreamReader(instream,ENCODING));
String line;
// .........
while ((line = r.readLine()) != null)
System.out.println(line);
outstream.close();
r.close();
socket.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
Chinese ch = new Chinese();
}
}