コード例 #1
0
  // not used anywhere...had written for checking purpose
  public void readAndStemDocument(boolean stem) {
    try {

      String text = "";
      ArrayList<String> stemtoken = new ArrayList<String>();

      String sCurrentLine;

      FileReader f = new FileReader(TrainDir + filename);
      BufferedReader br = new BufferedReader(f);
      while ((sCurrentLine = br.readLine()) != null) {
        text += " " + sCurrentLine;
      }
      f.close();

      PorterStemmer stemmer = new PorterStemmer();
      StringTokenizer str = new StringTokenizer(text);
      while (str.hasMoreTokens()) {

        String token = str.nextToken();
        if (token == null) continue;
        token = token.replaceAll("[^a-z\\sA-Z\\-]", "");
        if ("".equals(token)) continue;
        if (stem) {
          stemmer.setCurrent(token);
          stemmer.stem();
          stemtoken.add(stemmer.getCurrent().toLowerCase()); // PorterStemmer
          // keywords.add(ls.getStem(token)); //LovinStemmer
        } else {
          stemtoken.add(token);
          // keywords.add(ls.getStem(token)); //LovinStemmer
        }
      }
      for (int i = 0; i < stemtoken.size(); i++) {
        document += " " + stemtoken.get(i);
      }

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
コード例 #2
0
 /*    */ public String stem(String word) /*    */ {
   /*  8 */ PorterStemmer stem = new PorterStemmer();
   /*  9 */ stem.setCurrent(word);
   /* 10 */ stem.stem();
   /* 11 */ return stem.getCurrent();
   /*    */ }
コード例 #3
0
ファイル: IndexTest.java プロジェクト: BobPicWaver/ansj_seg
 @Test
 public void poreterTest() {
   PorterStemmer ps = new PorterStemmer();
   System.out.println(ps.stem());
 }