@Override public void configure(JobConf job) { super.configure(job); try { _partFileWithHeader = TfUtils.isPartFileWithHeader(job); tfmapper = new TfUtils(job); tfmapper.loadTfMetadata(job, true); // Load relevant information for CSV Reblock ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); String thisfile = thisPath.toString(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { // "key" needn't be checked since the offset file has information about a single CSV input // (the raw data file) if (thisfile.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } reader.close(); idxRow = new CSVReblockMapper.IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } // always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); } catch (IOException e) { throw new RuntimeException(e); } catch (JSONException e) { throw new RuntimeException(e); } }