public StringList read() throws IOException {
    String line = lineStream.read();
    StringList name = null;

    if ((line != null) && (!StringUtil.isEmpty(line))) {
      String name2;
      // find the location of the name separator in the line of data.
      int pos = line.indexOf(' ');
      if ((pos != -1)) {
        String parsed = line.substring(0, pos);
        // the data is in ALL CAPS ... so the easiest way is to convert
        // back to standard mixed case.
        if ((parsed.length() > 2) && (parsed.startsWith("MC"))) {
          name2 =
              parsed.substring(0, 1).toUpperCase(locale)
                  + parsed.substring(1, 2).toLowerCase(locale)
                  + parsed.substring(2, 3).toUpperCase(locale)
                  + parsed.substring(3).toLowerCase(locale);
        } else {
          name2 =
              parsed.substring(0, 1).toUpperCase(locale) + parsed.substring(1).toLowerCase(locale);
        }
        name = new StringList(new String[] {name2});
      }
    }

    return name;
  }
  public NameSample read() throws IOException {

    List<String> tokens = new ArrayList<String>();
    List<String> neTypes = new ArrayList<String>();
    boolean isClearAdaptiveData = false;

    // Empty line indicates end of sentence
    String line;
    while ((line = lineStream.read()) != null && !StringUtil.isEmpty(line)) {
      // clear adaptive data if document mark appears following
      // CoNLL03 conventions
      if (clearFeatures.equalsIgnoreCase("docstart") && line.startsWith("-DOCSTART-")) {
        isClearAdaptiveData = true;
        String emptyLine = lineStream.read();
        if (!StringUtil.isEmpty(emptyLine))
          throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine + "'!");
        continue;
      }
      String fields[] = line.split("\t");
      if (fields.length == 2) {
        tokens.add(fields[0]);
        neTypes.add(fields[1]);
      } else {
        throw new IOException(
            "Expected two fields per line in training data, got "
                + fields.length
                + " for line '"
                + line
                + "'!");
      }
    }
    // if no -DOCSTART- mark, check if we need to clear features every sentence
    if (clearFeatures.equalsIgnoreCase("yes")) {
      isClearAdaptiveData = true;
    }

    if (tokens.size() > 0) {
      // convert name tags into spans
      List<Span> names = new ArrayList<Span>();

      int beginIndex = -1;
      int endIndex = -1;
      for (int i = 0; i < neTypes.size(); i++) {
        String neTag = neTypes.get(i);
        if (neTag.equals("O")) {
          // O means we don't have anything this round.
          if (beginIndex != -1) {
            names.add(extract(beginIndex, endIndex, neTypes.get(beginIndex)));
            beginIndex = -1;
            endIndex = -1;
          }
        } else if (neTag.startsWith("B-")) {
          // B- prefix means we have two same entities of the same class next to each other
          if (beginIndex != -1) {
            names.add(extract(beginIndex, endIndex, neTypes.get(beginIndex)));
          }
          beginIndex = i;
          endIndex = i + 1;
        } else if (neTag.startsWith("I-")) {
          // I- starts or continues a current name entity
          if (beginIndex == -1) {
            beginIndex = i;
            endIndex = i + 1;
          } else if (!neTag.endsWith(neTypes.get(beginIndex).substring(1))) {
            // we have a new tag type following a tagged word series
            // also may not have the same I- starting the previous!
            names.add(extract(beginIndex, endIndex, neTypes.get(beginIndex)));
            beginIndex = i;
            endIndex = i + 1;
          } else {
            endIndex++;
          }
        } else {
          throw new IOException("Invalid tag: " + neTag);
        }
      }

      // if one span remains, create it here
      if (beginIndex != -1) names.add(extract(beginIndex, endIndex, neTypes.get(beginIndex)));

      return new NameSample(
          tokens.toArray(new String[tokens.size()]),
          names.toArray(new Span[names.size()]),
          isClearAdaptiveData);
    } else if (line != null) {
      // Just filter out empty events, if two lines in a row are empty
      return read();
    } else {
      // source stream is not returning anymore lines
      return null;
    }
  }