Exemplo n.º 1
0
 @Test
 public void testResetReturnsToMarkedPositionOfTextWithMark() throws IOException {
   _textReader.read();
   _textReader.mark(0);
   _textReader.reset();
   assertEquals("First Value After Reset Is e", 'e', (char) _textReader.read());
 }
  /**
   * Generates the next record batch
   *
   * @return number of records in the batch
   */
  @Override
  public int next() {
    reader.resetForNextBatch();
    int cnt = 0;

    try {
      while (cnt < MAX_RECORDS_PER_BATCH && reader.parseNext()) {
        cnt++;
      }
      reader.finishBatch();
      return cnt;
    } catch (IOException | TextParsingException e) {
      throw UserException.dataReadError(e)
          .addContext(
              "Failure while reading file %s. Happened at or shortly before byte position %d.",
              split.getPath(), reader.getPos())
          .build(logger);
    }
  }
 /**
  * Cleanup state once we are finished processing all the records. This would internally close the
  * input stream we are reading from.
  */
 @Override
 public void close() {
   try {
     if (reader != null) {
       reader.close();
       reader = null;
     }
   } catch (IOException e) {
     logger.warn("Exception while closing stream.", e);
   }
 }
  /**
   * This method is responsible to implement logic for extracting header from text file Currently it
   * is assumed to be first line if headerExtractionEnabled is set to true TODO: enhance to support
   * more common header patterns
   *
   * @return field name strings
   */
  private String[] extractHeader()
      throws SchemaChangeException, IOException, ExecutionSetupException {
    assert (settings.isHeaderExtractionEnabled());
    assert (oContext != null);

    // don't skip header in case skipFirstLine is set true
    settings.setSkipFirstLine(false);

    // setup Output using OutputMutator
    // we should use a separate output mutator to avoid reshaping query output with header data
    HeaderOutputMutator hOutputMutator = new HeaderOutputMutator();
    TextOutput hOutput = new RepeatedVarCharOutput(hOutputMutator, getColumns(), true);
    this.allocate(hOutputMutator.fieldVectorMap);

    // setup Input using InputStream
    // we should read file header irrespective of split given given to this reader
    InputStream hStream = dfs.openPossiblyCompressedStream(split.getPath());
    TextInput hInput =
        new TextInput(
            settings, hStream, oContext.getManagedBuffer(READ_BUFFER), 0, split.getLength());

    // setup Reader using Input and Output
    this.reader =
        new TextReader(settings, hInput, hOutput, oContext.getManagedBuffer(WHITE_SPACE_BUFFER));
    reader.start();

    // extract first row only
    reader.parseNext();

    // grab the field names from output
    String[] fieldNames = ((RepeatedVarCharOutput) hOutput).getTextOutput();

    // cleanup and set to skip the first line next time we read input
    reader.close();
    hOutputMutator.close();
    settings.setSkipFirstLine(true);

    return fieldNames;
  }
Exemplo n.º 5
0
 public void train() {
   int n = txt_rdr.getSize();
   assert (n == ans.size() && n != 0);
   ExecutorService fre_executor = Executors.newFixedThreadPool(NTHREADS);
   ExecutorService dict_executor = Executors.newFixedThreadPool(NTHREADS);
   for (int i = 0; i < n; i++) {
     Runnable task = new FreRunnable(i);
     fre_executor.execute(task);
   }
   fre_executor.shutdown();
   while (!fre_executor.isTerminated()) {}
   for (String s : f_rec.getRecordedStrings()) {
     Runnable task = new DictRunnable(s);
     dict_executor.execute(task);
   }
   dict_executor.shutdown();
   while (!dict_executor.isTerminated()) {}
 }
  /**
   * Performs the initial setup required for the record reader. Initializes the input stream,
   * handling of the output record batch and the actual reader to be used.
   *
   * @param context operator context from which buffer's will be allocated and managed
   * @param outputMutator Used to create the schema in the output record batch
   * @throws ExecutionSetupException
   */
  @Override
  public void setup(OperatorContext context, OutputMutator outputMutator)
      throws ExecutionSetupException {

    oContext = context;
    readBuffer = context.getManagedBuffer(READ_BUFFER);
    whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);

    // setup Output, Input, and Reader
    try {
      TextOutput output = null;
      TextInput input = null;
      InputStream stream = null;

      // setup Output using OutputMutator
      if (settings.isHeaderExtractionEnabled()) {
        // extract header and use that to setup a set of VarCharVectors
        String[] fieldNames = extractHeader();
        output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery());
      } else {
        // simply use RepeatedVarCharVector
        output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery());
      }

      // setup Input using InputStream
      stream = dfs.openPossiblyCompressedStream(split.getPath());
      input =
          new TextInput(
              settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());

      // setup Reader using Input and Output
      reader = new TextReader(settings, input, output, whitespaceBuffer);
      reader.start();

    } catch (SchemaChangeException | IOException e) {
      throw new ExecutionSetupException(
          String.format("Failure while setting up text reader for file %s", split.getPath()), e);
    } catch (IllegalArgumentException e) {
      throw UserException.dataReadError(e)
          .addContext("File Path", split.getPath().toString())
          .build(logger);
    }
  }
Exemplo n.º 7
0
 public void readTrainingData() {
   try {
     // readAnswer
     System.out.println("Accessing " + filenameA);
     FileReader fr = new FileReader(filenameA);
     BufferedReader br = new BufferedReader(fr);
     String tmp = br.readLine();
     while (tmp != null) {
       if (tmp.trim().equals("P")) {
         ans.add(true);
         ans_pos += 1;
       } else {
         ans.add(false);
         ans_neg += 1;
       }
       tmp = br.readLine();
     }
     br.close();
     // readText
     txt_rdr.readText(filenameT);
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Exemplo n.º 8
0
 @Test
 public void testResetReturnsToBeginningOfTextWithoutMark() throws IOException {
   _textReader.read();
   _textReader.reset();
   assertEquals("First Value After Reset Is T", 'T', (char) _textReader.read());
 }
Exemplo n.º 9
0
 @Test(expected = IOException.class)
 public void testReadyThrowsExceptionIfClosed() throws IOException {
   _textReader.close();
   _textReader.ready();
 }
Exemplo n.º 10
0
 @Test
 public void testReady() throws IOException {
   assertTrue("CharSequenceReader is Ready!", _textReader.ready());
 }
Exemplo n.º 11
0
 @Test
 public void testReadToAppendable() throws IOException {
   char[] array = new char[4];
   _textReader.read(CharBuffer.wrap(array));
   assertEquals("TextBuilder Should Contain Test", "Test", String.valueOf(array));
 }
Exemplo n.º 12
0
 @Test
 public void testRead() throws IOException {
   assertEquals("Read Value is T", "T", String.valueOf((char) _textReader.read()));
 }
Exemplo n.º 13
0
 @Test(expected = IllegalArgumentException.class)
 public void testMarkWithNegativeArgumentThrowsException() throws IOException {
   _textReader.mark(-1);
 }
Exemplo n.º 14
0
 @Test
 public void testMarkSupported() {
   assertTrue("Mark Is Supported", _textReader.markSupported());
 }