Esempio n. 1
0
/** org.apache.spark.examples.WordsMapFunction User: Steve Date: 8/25/2014 */
public class WordsMapFunction extends AbstractLoggingFlatMapFunction<String, String> {

  private static final Pattern SPACE = Pattern.compile(" ");

  public Iterable<String> doCall(String s) {
    // keep count of letters
    getAccumulators().incrementAccumulator("TotalLetters", s.length());
    String[] split = SPACE.split(s);
    List<String> ret = new ArrayList<String>();
    for (int i = 0; i < split.length; i++) {
      String sx = regularizeString(split[i]);
      if (sx.length() > 0) ret.add(sx);
    }
    return ret;
  }

  public static String dropNonLetters(String s) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
      if (Character.isLetter(c)) sb.append(c);
    }

    return sb.toString();
  }

  public static String regularizeString(String inp) {
    inp = inp.trim();
    inp = inp.toUpperCase();
    return dropNonLetters(inp);
  }
}
Esempio n. 2
0
 public Iterable<String> doCall(String s) {
   // keep count of letters
   getAccumulators().incrementAccumulator("TotalLetters", s.length());
   String[] split = SPACE.split(s);
   List<String> ret = new ArrayList<String>();
   for (int i = 0; i < split.length; i++) {
     String sx = regularizeString(split[i]);
     if (sx.length() > 0) ret.add(sx);
   }
   return ret;
 }