/** * Generates the next record batch * * @return number of records in the batch */ @Override public int next() { reader.resetForNextBatch(); int cnt = 0; try { while (cnt < MAX_RECORDS_PER_BATCH && reader.parseNext()) { cnt++; } reader.finishBatch(); return cnt; } catch (IOException | TextParsingException e) { throw UserException.dataReadError(e) .addContext( "Failure while reading file %s. Happened at or shortly before byte position %d.", split.getPath(), reader.getPos()) .build(logger); } }
/** * This method is responsible to implement logic for extracting header from text file Currently it * is assumed to be first line if headerExtractionEnabled is set to true TODO: enhance to support * more common header patterns * * @return field name strings */ private String[] extractHeader() throws SchemaChangeException, IOException, ExecutionSetupException { assert (settings.isHeaderExtractionEnabled()); assert (oContext != null); // don't skip header in case skipFirstLine is set true settings.setSkipFirstLine(false); // setup Output using OutputMutator // we should use a separate output mutator to avoid reshaping query output with header data HeaderOutputMutator hOutputMutator = new HeaderOutputMutator(); TextOutput hOutput = new RepeatedVarCharOutput(hOutputMutator, getColumns(), true); this.allocate(hOutputMutator.fieldVectorMap); // setup Input using InputStream // we should read file header irrespective of split given given to this reader InputStream hStream = dfs.openPossiblyCompressedStream(split.getPath()); TextInput hInput = new TextInput( settings, hStream, oContext.getManagedBuffer(READ_BUFFER), 0, split.getLength()); // setup Reader using Input and Output this.reader = new TextReader(settings, hInput, hOutput, oContext.getManagedBuffer(WHITE_SPACE_BUFFER)); reader.start(); // extract first row only reader.parseNext(); // grab the field names from output String[] fieldNames = ((RepeatedVarCharOutput) hOutput).getTextOutput(); // cleanup and set to skip the first line next time we read input reader.close(); hOutputMutator.close(); settings.setSkipFirstLine(true); return fieldNames; }