public static void main(String[] args) throws IOException { Tokenizer tokenizer; if (args.length == 1) { Mode mode = Mode.valueOf(args[0].toUpperCase()); tokenizer = Tokenizer.builder().mode(mode).build(); } else if (args.length == 2) { Mode mode = Mode.valueOf(args[0].toUpperCase()); tokenizer = Tokenizer.builder().mode(mode).userDictionary(args[1]).build(); } else { tokenizer = Tokenizer.builder().build(); } System.out.println("Tokenizer ready. Provide input text and press RET."); BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); String line; while ((line = reader.readLine()) != null) { List<Token> result = tokenizer.tokenize(line); for (Token token : result) { System.out.println(token.getSurfaceForm() + "\t" + token.getAllFeatures()); } } }
public static void main(String[] args) { /* Tokenizer tokenizer = Tokenizer.builder().build(); List<Token> tokens = tokenizer.tokenize("僕の夢は、不労収入を得て一生遊んで暮らすことです。"); for (Token token : tokens) { System.out.println("=================================================="); System.out.println("allFeatures : " + token.getAllFeatures()); System.out.println("partOfSpeech : " + token.getPartOfSpeech()); System.out.println("position : " + token.getPosition()); System.out.println("reading : " + token.getReading()); System.out.println("surfaceFrom : " + token.getSurfaceForm()); System.out.println("allFeaturesArray : " + Arrays.asList(token.getAllFeaturesArray())); System.out.println("辞書にある言葉? : " + token.isKnown()); System.out.println("未知語? : " + token.isUnknown()); System.out.println("ユーザ定義? : " + token.isUser()); System.out.println(token.getSurfaceForm() + "\t" + token.getAllFeatures()); } */ Tokenizer tokenizer = Tokenizer.builder().build(); String[] stringArray = new String[] { "すもももももももものうち。", "メガネは顔の一部です。", "日本経済新聞でモバゲーの記事を読んだ。", "Java, Scala, Groovy, Clojure", "LUCENE、SOLR、Lucene, Solr", "アイウエオカキクケコさしすせそABCXYZ123456", "Lucene is a full-featured text search engine library written in Java." }; for (int j = 0; j < stringArray.length; j++) { System.out.println("======================="); System.out.println(stringArray[j]); List<Token> tokens = tokenizer.tokenize(stringArray[j]); for (Token token : tokens) { System.out.println(token.getSurfaceForm() + "\t" + token.getAllFeatures()); /* String surface = token.getSurfaceForm(); String[] fetures = token.getAllFeatures().split(",", 0); String tmp_str = String.join("|", fetures); System.out.println(tmp_str); */ } } }