Beispiel #1
0
  @Override
  public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
    final String line;
    final int myID;

    synchronized (this) {
      line = reader.readLine();
      if (line == null) {
        if (!forever) {
          throw new NoMoreDataException();
        }
        // Reset the file
        openFile();
        return getNextDocData(docData);
      }
      if (docDataLineReader == null) { // first line ever, one time initialization,
        docDataLineReader = createDocDataLineReader(line);
        if (skipHeaderLine) {
          return getNextDocData(docData);
        }
      }
      // increment IDS only once...
      myID = readCount++;
    }

    // The date String was written in the format of DateTools.dateToString.
    docData.clear();
    docData.setID(myID);
    docDataLineReader.parseLine(docData, line);
    return docData;
  }
 @Override
 public synchronized DocData getNextDocData(DocData docData)
     throws NoMoreDataException, IOException {
   String[] tuple = parser.next();
   docData.clear();
   docData.setName(tuple[ID]);
   docData.setBody(tuple[BODY]);
   docData.setDate(tuple[DATE]);
   docData.setTitle(tuple[TITLE]);
   return docData;
 }
  @Override
  public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
    File f = null;
    String name = null;
    synchronized (this) {
      if (!inputFiles.hasNext()) {
        // exhausted files, start a new round, unless forever set to false.
        if (!forever) {
          throw new NoMoreDataException();
        }
        inputFiles = new Iterator(dataDir);
        iteration++;
      }
      f = inputFiles.next();
      // System.err.println(f);
      name = f.getCanonicalPath() + "_" + iteration;
    }

    BufferedReader reader =
        new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
    String line = null;
    // First line is the date, 3rd is the title, rest is body
    String dateStr = reader.readLine();
    reader.readLine(); // skip an empty line
    String title = reader.readLine();
    reader.readLine(); // skip an empty line
    StringBuilder bodyBuf = new StringBuilder(1024);
    while ((line = reader.readLine()) != null) {
      bodyBuf.append(line).append(' ');
    }
    reader.close();
    addBytes(f.length());

    Date date = parseDate(dateStr);

    docData.clear();
    docData.setName(name);
    docData.setBody(bodyBuf.toString());
    docData.setTitle(title);
    docData.setDate(date);
    return docData;
  }