public void run() {
    int articleID = IDManager.getNextStemArticleID();

    char[] w = new char[501];
    Stemmer s = new Stemmer();
    //	   NewsTextRetriever nt = new NewsTextRetriever();

    while (articleID != 0) {
      String document = DBConnection.getInstance().getNewsText(articleID);
      if (document == null) {
        articleID = IDManager.getNextStemArticleID();
        continue;
      }
      // replace useless stuff
      document = document.replace(";", "");
      document = document.replace("!", "");
      document = document.replace("?", "");
      document = document.replace(",", "");
      document = document.replace("'", "");
      document = document.replace("´", "");
      document = document.replace("`", "");
      document = document.replace(".", "");
      document = document.replace(":", "");

      char[] text = document.toCharArray();

      Article article = new Article(articleID);

      // Iterate over all words and stem them. Original algorithm
      int i = 0;
      while (i < text.length) {
        char ch = text[i];
        i++;

        if (Character.isLetter((char) ch)) {
          int j = 0;
          while (i < text.length) {
            {
              ch = Character.toLowerCase((char) ch);
              w[j] = (char) ch;
              if (j < 500) j++;
              ch = text[i];
              i++;
              if (!Character.isLetter((char) ch) || i == text.length) {
                if (i == text.length) {
                  w[j] = (char) ch;
                  if (j < 500) j++;
                }
                /* to test add(char ch) */
                for (int c = 0; c < j; c++) s.add(w[c]);

                /* or, to test add(char[] w, int j) */
                /* s.add(w, j); */

                s.stem();
                String stem = s.toString();
                if (stem.length() < 255) {
                  article.addArticleFeatureInstance(stem);
                  break;
                }
              }
            }
          }
          if (ch < 0) break;
        }
      }

      DBConnection.getInstance().storeNewsStems(article);
      articleID = IDManager.getNextStemArticleID();
    }
  }
Beispiel #2
0
 public PhySimObj() {
   this(IDManager.getNextID());
 }