@Test public void testResetReturnsToMarkedPositionOfTextWithMark() throws IOException { _textReader.read(); _textReader.mark(0); _textReader.reset(); assertEquals("First Value After Reset Is e", 'e', (char) _textReader.read()); }
/** * Generates the next record batch * * @return number of records in the batch */ @Override public int next() { reader.resetForNextBatch(); int cnt = 0; try { while (cnt < MAX_RECORDS_PER_BATCH && reader.parseNext()) { cnt++; } reader.finishBatch(); return cnt; } catch (IOException | TextParsingException e) { throw UserException.dataReadError(e) .addContext( "Failure while reading file %s. Happened at or shortly before byte position %d.", split.getPath(), reader.getPos()) .build(logger); } }
/** * Cleanup state once we are finished processing all the records. This would internally close the * input stream we are reading from. */ @Override public void close() { try { if (reader != null) { reader.close(); reader = null; } } catch (IOException e) { logger.warn("Exception while closing stream.", e); } }
/** * This method is responsible to implement logic for extracting header from text file Currently it * is assumed to be first line if headerExtractionEnabled is set to true TODO: enhance to support * more common header patterns * * @return field name strings */ private String[] extractHeader() throws SchemaChangeException, IOException, ExecutionSetupException { assert (settings.isHeaderExtractionEnabled()); assert (oContext != null); // don't skip header in case skipFirstLine is set true settings.setSkipFirstLine(false); // setup Output using OutputMutator // we should use a separate output mutator to avoid reshaping query output with header data HeaderOutputMutator hOutputMutator = new HeaderOutputMutator(); TextOutput hOutput = new RepeatedVarCharOutput(hOutputMutator, getColumns(), true); this.allocate(hOutputMutator.fieldVectorMap); // setup Input using InputStream // we should read file header irrespective of split given given to this reader InputStream hStream = dfs.openPossiblyCompressedStream(split.getPath()); TextInput hInput = new TextInput( settings, hStream, oContext.getManagedBuffer(READ_BUFFER), 0, split.getLength()); // setup Reader using Input and Output this.reader = new TextReader(settings, hInput, hOutput, oContext.getManagedBuffer(WHITE_SPACE_BUFFER)); reader.start(); // extract first row only reader.parseNext(); // grab the field names from output String[] fieldNames = ((RepeatedVarCharOutput) hOutput).getTextOutput(); // cleanup and set to skip the first line next time we read input reader.close(); hOutputMutator.close(); settings.setSkipFirstLine(true); return fieldNames; }
public void train() { int n = txt_rdr.getSize(); assert (n == ans.size() && n != 0); ExecutorService fre_executor = Executors.newFixedThreadPool(NTHREADS); ExecutorService dict_executor = Executors.newFixedThreadPool(NTHREADS); for (int i = 0; i < n; i++) { Runnable task = new FreRunnable(i); fre_executor.execute(task); } fre_executor.shutdown(); while (!fre_executor.isTerminated()) {} for (String s : f_rec.getRecordedStrings()) { Runnable task = new DictRunnable(s); dict_executor.execute(task); } dict_executor.shutdown(); while (!dict_executor.isTerminated()) {} }
/** * Performs the initial setup required for the record reader. Initializes the input stream, * handling of the output record batch and the actual reader to be used. * * @param context operator context from which buffer's will be allocated and managed * @param outputMutator Used to create the schema in the output record batch * @throws ExecutionSetupException */ @Override public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException { oContext = context; readBuffer = context.getManagedBuffer(READ_BUFFER); whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER); // setup Output, Input, and Reader try { TextOutput output = null; TextInput input = null; InputStream stream = null; // setup Output using OutputMutator if (settings.isHeaderExtractionEnabled()) { // extract header and use that to setup a set of VarCharVectors String[] fieldNames = extractHeader(); output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery()); } else { // simply use RepeatedVarCharVector output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery()); } // setup Input using InputStream stream = dfs.openPossiblyCompressedStream(split.getPath()); input = new TextInput( settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength()); // setup Reader using Input and Output reader = new TextReader(settings, input, output, whitespaceBuffer); reader.start(); } catch (SchemaChangeException | IOException e) { throw new ExecutionSetupException( String.format("Failure while setting up text reader for file %s", split.getPath()), e); } catch (IllegalArgumentException e) { throw UserException.dataReadError(e) .addContext("File Path", split.getPath().toString()) .build(logger); } }
public void readTrainingData() { try { // readAnswer System.out.println("Accessing " + filenameA); FileReader fr = new FileReader(filenameA); BufferedReader br = new BufferedReader(fr); String tmp = br.readLine(); while (tmp != null) { if (tmp.trim().equals("P")) { ans.add(true); ans_pos += 1; } else { ans.add(false); ans_neg += 1; } tmp = br.readLine(); } br.close(); // readText txt_rdr.readText(filenameT); } catch (IOException e) { e.printStackTrace(); } }
@Test public void testResetReturnsToBeginningOfTextWithoutMark() throws IOException { _textReader.read(); _textReader.reset(); assertEquals("First Value After Reset Is T", 'T', (char) _textReader.read()); }
@Test(expected = IOException.class) public void testReadyThrowsExceptionIfClosed() throws IOException { _textReader.close(); _textReader.ready(); }
@Test public void testReady() throws IOException { assertTrue("CharSequenceReader is Ready!", _textReader.ready()); }
@Test public void testReadToAppendable() throws IOException { char[] array = new char[4]; _textReader.read(CharBuffer.wrap(array)); assertEquals("TextBuilder Should Contain Test", "Test", String.valueOf(array)); }
@Test public void testRead() throws IOException { assertEquals("Read Value is T", "T", String.valueOf((char) _textReader.read())); }
@Test(expected = IllegalArgumentException.class) public void testMarkWithNegativeArgumentThrowsException() throws IOException { _textReader.mark(-1); }
@Test public void testMarkSupported() { assertTrue("Mark Is Supported", _textReader.markSupported()); }