// not used anywhere...had written for checking purpose public void readAndStemDocument(boolean stem) { try { String text = ""; ArrayList<String> stemtoken = new ArrayList<String>(); String sCurrentLine; FileReader f = new FileReader(TrainDir + filename); BufferedReader br = new BufferedReader(f); while ((sCurrentLine = br.readLine()) != null) { text += " " + sCurrentLine; } f.close(); PorterStemmer stemmer = new PorterStemmer(); StringTokenizer str = new StringTokenizer(text); while (str.hasMoreTokens()) { String token = str.nextToken(); if (token == null) continue; token = token.replaceAll("[^a-z\\sA-Z\\-]", ""); if ("".equals(token)) continue; if (stem) { stemmer.setCurrent(token); stemmer.stem(); stemtoken.add(stemmer.getCurrent().toLowerCase()); // PorterStemmer // keywords.add(ls.getStem(token)); //LovinStemmer } else { stemtoken.add(token); // keywords.add(ls.getStem(token)); //LovinStemmer } } for (int i = 0; i < stemtoken.size(); i++) { document += " " + stemtoken.get(i); } } catch (IOException e) { e.printStackTrace(); } }
/* */ public String stem(String word) /* */ { /* 8 */ PorterStemmer stem = new PorterStemmer(); /* 9 */ stem.setCurrent(word); /* 10 */ stem.stem(); /* 11 */ return stem.getCurrent(); /* */ }