コード例 #1
0
ファイル: JaccardScorer.java プロジェクト: lyhong508/DStc
 /*
  * Puts words into the word set while initializing and then remove stop words
  */
 public JaccardScorer(String fileName, String stopWordsFileName) {
   // read file text into the fileText stringBuilder
   this.fileName = fileName;
   String str = null;
   try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
     while ((str = br.readLine()) != null) {
       fileText.append(str + "\n");
     }
   } catch (FileNotFoundException e1) {
     System.out.println(e1.getMessage());
   } catch (IOException e2) {
     System.out.println(e2.getMessage());
   }
   // use getTokenList method to build 'words' set
   words = getTokenSet(fileText.toString(), "[a-zA-Z0-9']+");
   RemoveStopWords(stopWordsFileName);
 }