// todo: give options for document splitting. A line or the whole file or
  // sentence splitting as now
  public Iterator<List<IN>> getIterator(Reader r) {
    Tokenizer<IN> tokenizer = tokenizerFactory.getTokenizer(r);
    // PTBTokenizer.newPTBTokenizer(r, false, true);
    List<IN> words = new ArrayList<IN>();
    IN previous = tokenFactory.makeToken();
    StringBuilder prepend = new StringBuilder();

    /*
     * This changes SGML tags into whitespace -- it should maybe be moved
     * elsewhere
     */
    while (tokenizer.hasNext()) {
      IN w = tokenizer.next();
      String word = w.get(CoreAnnotations.TextAnnotation.class);
      Matcher m = sgml.matcher(word);
      if (m.matches()) {

        String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class));
        String after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class));
        prepend.append(before).append(word);
        String previousTokenAfter =
            StringUtils.getNotNullString(previous.get(CoreAnnotations.AfterAnnotation.class));
        previous.set(AfterAnnotation.class, previousTokenAfter + word + after);
        // previous.appendAfter(w.word() + w.after());
      } else {

        String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class));
        if (prepend.length() > 0) {
          w.set(BeforeAnnotation.class, prepend.toString() + before);
          // w.prependBefore(prepend.toString());
          prepend = new StringBuilder();
        }
        words.add(w);
        previous = w;
      }
    }

    List<List<IN>> sentences = wts.process(words);
    String after = "";
    IN last = null;
    for (List<IN> sentence : sentences) {
      int pos = 0;
      for (IN w : sentence) {
        w.set(PositionAnnotation.class, Integer.toString(pos));
        after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class));
        w.remove(AfterAnnotation.class);
        last = w;
      }
    }
    if (last != null) {
      last.set(AfterAnnotation.class, after);
    }

    return sentences.iterator();
  }
 @Override
 public Sequence<IString> process(String input) {
   String tokenizerInput = toUncased(input.trim());
   Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new StringReader(tokenizerInput));
   List<String> outputStrings = new LinkedList<>();
   while (tokenizer.hasNext()) {
     String string = tokenizer.next().get(TextAnnotation.class);
     outputStrings.add(string);
   }
   return IStrings.toIStringSequence(outputStrings);
 }
 public static void main(String[] args) throws IOException {
   Tokenizer<String> att = new ArabicTreebankTokenizer(new FileReader(args[0]));
   while (att.hasNext()) {
     System.out.print(att.next());
   }
 }