Java PorterStemmer.stem Examples

Programming Language: Java

Class/Type: PorterStemmer

Method/Function: stem

Examples at hotexamples.com: 2

Java PorterStemmer.stem - 2 examples found. These are the top rated real world Java examples of PorterStemmer.stem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getStemmedTextAndSuffix(3)

stem(2)

add(1)

reset(1)

toString(1)

Example #1

Show file

File: PorterStemmer.java Project: cwinters/notem

  /**
   * Test program for demonstrating the Stemmer. It reads a file and stems each word, writing the
   * result to standard out. Usage: Stemmer file-name
   *
   * @param args command-line args
   */
  public static void main(String[] args) {
    PorterStemmer s = new PorterStemmer();

    for (int i = 0; i < args.length; i++) {
      for (int j = 0; j < args[i].length(); j++) {
        s.add(args[i].charAt(j));
      }
      s.stem();
      System.out.println(s.toString());
      s.reset();
    }
  }

Example #2

Show file

File: OkapiBM25.java Project: PraneethVellaboyana/Academic_Projects

  @SuppressWarnings({"unchecked", "rawtypes"})
  public List<String> getBestMatchingScore(String indexfile, String queryfile, String num)
      throws IOException, ClassNotFoundException {

    //        try {
    int count = Integer.parseInt(num);

    // String cur_dir = "D:\\STS_Workspace\\IRSearch_PraneethReddyVellaboyana"; cur_dir + "\\" +
    // System.out.println("!!!!!!!!!!!!!!!!"+cur_dir);
    FileInputStream fileIn = new FileInputStream(IntegerConstants.cur_dir + indexfile);
    ObjectInputStream in = new ObjectInputStream(fileIn);
    inv_index = (TreeMap) in.readObject();
    System.out.println(inv_index);

    tokenCount = (TreeMap) in.readObject();

    System.out.println(tokenCount);
    // Total number of words in all the documents
    int totalTokenCount = 0;

    // Calculate the total number of tokens in the collection
    for (Iterator i = tokenCount.entrySet().iterator(); i.hasNext(); ) {
      Map.Entry next = (Map.Entry) i.next();
      totalTokenCount = totalTokenCount + (Integer) next.getValue();
    }
    // average  document length
    Double avdl = totalTokenCount * 1.0 / tokenCount.size();

    // Reading a file cur_dir + "\\" +
    File qf = new File(IntegerConstants.cur_dir + queryfile);
    BufferedReader bf = new BufferedReader(new FileReader(qf));

    String querytext;

    int queryid = 1;
    while ((querytext = bf.readLine()) != null) {

      String[] querywords = querytext.split(" ");

      // Step1: Retrieve all inverted lists corresponding to terms in a query.
      for (String word1 : querywords) {

        PorterStemmer ps = new PorterStemmer();
        String word = ps.stem(word1);

        word = word.trim();
        if (!word.equals("") && inv_index.containsKey(word)) {
          query_index.put(word, inv_index.get(word));
        }
      }

      // Step2: Compute BM25 scores for documents in the lists.
      for (Iterator iterator1 = query_index.entrySet().iterator(); iterator1.hasNext(); ) {
        // next contains list of files for the query word and their occurrences in each file
        Map.Entry next = (Map.Entry) iterator1.next();
        TreeMap indexes = (TreeMap) next.getValue();
        for (Iterator iterator2 = indexes.entrySet().iterator(); iterator2.hasNext(); ) {
          Map.Entry next2 = (Map.Entry) iterator2.next();
          // number of words of the query in the document
          int fi = (Integer) next2.getValue();
          //  total number of documents
          int N = tokenCount.size();
          // number of files the query word occurred in
          int ni = indexes.size();
          Double qfi = 0.0;

          // Number of words in the query string
          for (int i = 0; i < querywords.length; i++) {
            // matching the query word with the index of the queries to count the matched and
            // revelent query
            if (querytext.contains(querywords[i])) {
              qfi++;
            }
          }

          // Computing K value
          Double K =
              IntegerConstants.k1
                  * ((1 - IntegerConstants.b)
                      + IntegerConstants.b * (tokenCount.get(next2.getKey()) / avdl));
          Double first_term =
              (Math.log(
                  ((IntegerConstants.ri + 0.5) / (IntegerConstants.R - IntegerConstants.ri + 0.5))
                      / ((ni - IntegerConstants.ri + 0.5)
                          / (N - ni - IntegerConstants.R + IntegerConstants.ri + 0.5))));
          Double second_term = ((IntegerConstants.k1 + 1) * fi / (K + fi));
          Double third_term = ((IntegerConstants.k2 + 1) * qfi / (IntegerConstants.k2 + qfi));
          Double total = first_term * second_term * third_term;

          if (documentScore.containsKey((String) next2.getKey())) {
            Double valueToPut = total + documentScore.get((String) next2.getKey());
            documentScore.put((String) next2.getKey(), valueToPut);
          } else {
            documentScore.put((String) next2.getKey(), total);
          }
        }
      }
      // putting all the rank in descending order using the rank
      DescendingOrder comp = new DescendingOrder((TreeMap) documentScore);
      // order document score
      TreeMap<String, Double> list_asc = new TreeMap<String, Double>(comp);
      list_asc.putAll(documentScore);

      int rank = 1;
      // iterating with respect to the number of fetch results(third) parameter from the method
      for (Iterator itr = list_asc.entrySet().iterator(); itr.hasNext() && rank <= count; ) {
        Map.Entry nx = (Map.Entry) itr.next();
        // Double bmValue = (Double) nx.getValue();
        // System.out.println(queryid + " Q0 " + nx.getKey() + " " + rank + " " + bmValue + "
        // Praneeth");
        // adding the files in the list by preserving the insertion order
        queryResults.add(nx.getKey().toString());
        rank++;
      }
      queryid++;
      documentScore.clear();
      query_index.clear();
    }

    in.close();
    fileIn.close();
    bf.close();
    return queryResults;
  }