@Test public void testGetOnlyName() throws Exception { FileInputFormat.addInputPath(conf, new Path("unused")); InputSplit[] splits = inputformat.getSplits(conf, 0); assertEquals(splits.length, 1); RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null); Text rowId = new Text("r1"); AccumuloHiveRow row = new AccumuloHiveRow(); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes()); rowId = new Text("r2"); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes()); rowId = new Text("r3"); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes()); assertFalse(reader.next(rowId, row)); }
public void run( RecordReader<IntWritable, WikipediaPage> input, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { IntWritable key = new IntWritable(); WikipediaPage value = new WikipediaPage(); long pos = -1; long prevPos = -1; int prevDocno = 0; pos = input.getPos(); while (input.next(key, value)) { if (prevPos != -1 && prevPos != pos) { LOG.info( "- beginning of block at " + prevPos + ", docno:" + prevDocno + ", file:" + fileno); keyOut.set(prevDocno); valOut.set(prevPos + "\t" + fileno); output.collect(keyOut, valOut); reporter.incrCounter(Blocks.Total, 1); } prevPos = pos; pos = input.getPos(); prevDocno = key.get(); } }
public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Step0OutputCollector collector = new Step0OutputCollector(numMaps); Reporter reporter = Reporter.NULL; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); } mapper.map(key, value, collector, reporter); size++; } mapper.close(); // validate the mapper's output assertEquals(p, collector.keys[p]); assertEquals(firstKey.longValue(), collector.values[p].getFirstId()); assertEquals(size, collector.values[p].getSize()); } }
@Test public void testGetNone() throws Exception { FileInputFormat.addInputPath(conf, new Path("unused")); conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1"); InputSplit[] splits = inputformat.getSplits(conf, 0); assertEquals(splits.length, 1); RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null); Text rowId = new Text("r1"); AccumuloHiveRow row = new AccumuloHiveRow(); row.setRowId("r1"); assertFalse(reader.next(rowId, row)); }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public void run( RecordReader<Text, ArcFileItem> input, OutputCollector<Text, CrawlURLMetadata> output, Reporter reporter) throws IOException { int lastValidPos = 0; try { // allocate key & value instances that are re-used for all entries Text key = input.createKey(); ArcFileItem value = input.createValue(); while (input.next(key, value)) { lastValidPos = value.getArcFilePos(); // map pair to output map(key, value, output, reporter); } } catch (IOException e) { String errorMessage = "Exception processing Split:" + _splitDetails + " Exception:" + StringUtils.stringifyException(e); LOG.error(errorMessage); if (_attemptID.getId() == 0 || (lastValidPos == 0 && _attemptID.getId() != _maxAttemptsPerTask - 1)) { throw new IOException(errorMessage); } // and just ignore the message } catch (Throwable e) { String errorMessage = "Unknown Exception processing Split:" + _splitDetails + " Exception:" + StringUtils.stringifyException(e); LOG.error(errorMessage); // if attempt number is not max attempt number configured... if (_attemptID.getId() != _maxAttemptsPerTask - 1) { // then bubble up exception throw new IOException(errorMessage); } } finally { close(); } }
private void writeThenReadByRecordReader( int intervalRecordCount, int writeCount, int splitNumber, long minSplitSize, CompressionCodec codec) throws IOException { Path testDir = new Path(System.getProperty("test.data.dir", ".") + "/mapred/testsmallfirstsplit"); Path testFile = new Path(testDir, "test_rcfile"); fs.delete(testFile, true); Configuration cloneConf = new Configuration(conf); RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); for (int i = 0; i < bytesArray.length; i++) { BytesRefWritable cu = null; cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); bytes.set(i, cu); } for (int i = 0; i < writeCount; i++) { if (i == intervalRecordCount) { System.out.println("write position:" + writer.getLength()); } writer.append(bytes); } writer.close(); RCFileInputFormat inputFormat = new RCFileInputFormat(); JobConf jonconf = new JobConf(cloneConf); jonconf.set("mapred.input.dir", testDir.toString()); jonconf.setLong("mapred.min.split.size", minSplitSize); InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber); assertEquals("splits length should be " + splitNumber, splits.length, splitNumber); int readCount = 0; for (int i = 0; i < splits.length; i++) { int previousReadCount = readCount; RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL); Object key = rr.createKey(); Object value = rr.createValue(); while (rr.next(key, value)) { readCount++; } System.out.println("The " + i + "th split read " + (readCount - previousReadCount)); } assertEquals("readCount should be equal to writeCount", readCount, writeCount); }
/** * read the the given input split. * * @return List : List of read messages */ private List<Message> readSplit( DatabusInputFormat format, InputSplit split, JobConf job, Reporter reporter) throws IOException { List<Message> result = new ArrayList<Message>(); RecordReader<LongWritable, Message> reader = format.getRecordReader(split, job, reporter); LongWritable key = ((DatabusRecordReader) reader).createKey(); Message value = ((DatabusRecordReader) reader).createValue(); while (((DatabusRecordReader) reader).next(key, value)) { result.add(value); value = (Message) ((DatabusRecordReader) reader).createValue(); } reader.close(); return result; }
private void updateCompletedBytes() { try { long newCompletedBytes = (long) (totalBytes * recordReader.getProgress()); completedBytes = min(totalBytes, max(completedBytes, newCompletedBytes)); } catch (IOException ignored) { } }
/** For a given RecordReader rr, occupy position id in collector. */ WrappedRecordReader(int id, RecordReader<K, U> rr, Class<? extends WritableComparator> cmpcl) throws IOException { this.id = id; this.rr = rr; khead = rr.createKey(); vhead = rr.createValue(); try { cmp = (null == cmpcl) ? WritableComparator.get(khead.getClass()) : cmpcl.newInstance(); } catch (InstantiationException e) { throw (IOException) new IOException().initCause(e); } catch (IllegalAccessException e) { throw (IOException) new IOException().initCause(e); } vjoin = new StreamBackedIterator<U>(); next(); }
@Test public void testHiveAccumuloRecord() throws Exception { FileInputFormat.addInputPath(conf, new Path("unused")); InputSplit[] splits = inputformat.getSplits(conf, 0); assertEquals(splits.length, 1); RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null); Text rowId = new Text("r1"); AccumuloHiveRow row = new AccumuloHiveRow(); row.add(COLUMN_FAMILY.toString(), NAME.toString(), "brian".getBytes()); row.add(COLUMN_FAMILY.toString(), SID.toString(), parseIntBytes("1")); row.add(COLUMN_FAMILY.toString(), DEGREES.toString(), parseDoubleBytes("44.5")); row.add(COLUMN_FAMILY.toString(), MILLIS.toString(), parseLongBytes("555")); assertTrue(reader.next(rowId, row)); assertEquals(rowId.toString(), row.getRowId()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals("brian".getBytes(), row.getValue(COLUMN_FAMILY, NAME)); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, SID)); assertArrayEquals(parseIntBytes("1"), row.getValue(COLUMN_FAMILY, SID)); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, DEGREES)); assertArrayEquals(parseDoubleBytes("44.5"), row.getValue(COLUMN_FAMILY, DEGREES)); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, MILLIS)); assertArrayEquals(parseLongBytes("555"), row.getValue(COLUMN_FAMILY, MILLIS)); }
/** * Use the input splits to take samples of the input and generate sample keys. By default reads * 100,000 keys from 10 locations in the input, sorts them and picks N-1 keys to generate N * equally sized partitions. * * @param conf the job to sample * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(JobConf conf, Path partFile) throws IOException { TeraInputFormat inFormat = new TeraInputFormat(); TextSampler sampler = new TextSampler(); Text key = new Text(); Text value = new Text(); int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; long records = 0; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null); while (reader.next(key, value)) { sampler.addKey(key); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); for (Text split : sampler.createPartitions(partitions)) { writer.append(split, nullValue); } writer.close(); }
@Override public void close() { // some hive input formats are broken and bad things can happen if you close them multiple times if (closed) { return; } closed = true; updateCompletedBytes(); try { recordReader.close(); } catch (IOException e) { throw Throwables.propagate(e); } }
@Override public void setFile(String file, long offset, long length) { JobConf defaultConf = new JobConf(); this.split = new FileSplit(new Path(file), offset, length, defaultConf); this.jobConf = defaultConf; // this.split = split; this.input_format = new TextInputFormat(); try { this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.key = reader.createKey(); }
@Test public void testGetProtectedField() throws Exception { FileInputFormat.addInputPath(conf, new Path("unused")); BatchWriterConfig writerConf = new BatchWriterConfig(); BatchWriter writer = con.createBatchWriter(TEST_TABLE, writerConf); Authorizations origAuths = con.securityOperations().getUserAuthorizations(USER); con.securityOperations() .changeUserAuthorizations(USER, new Authorizations(origAuths.toString() + ",foo")); Mutation m = new Mutation("r4"); m.put(COLUMN_FAMILY, NAME, new ColumnVisibility("foo"), new Value("frank".getBytes())); m.put(COLUMN_FAMILY, SID, new ColumnVisibility("foo"), new Value(parseIntBytes("4"))); m.put(COLUMN_FAMILY, DEGREES, new ColumnVisibility("foo"), new Value(parseDoubleBytes("60.6"))); m.put(COLUMN_FAMILY, MILLIS, new ColumnVisibility("foo"), new Value(parseLongBytes("777"))); writer.addMutation(m); writer.close(); conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo"); InputSplit[] splits = inputformat.getSplits(conf, 0); assertEquals(splits.length, 1); RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null); Text rowId = new Text("r1"); AccumuloHiveRow row = new AccumuloHiveRow(); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes()); rowId = new Text("r2"); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes()); rowId = new Text("r3"); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes()); rowId = new Text("r4"); assertTrue(reader.next(rowId, row)); assertEquals(row.getRowId(), rowId.toString()); assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "frank".getBytes()); assertFalse(reader.next(rowId, row)); }
@Override public boolean advanceNextPosition() { try { if (closed || !recordReader.next(key, value)) { close(); return false; } // reset loaded flags // partition keys are already loaded, but everything else is not System.arraycopy(isPartitionColumn, 0, loaded, 0, isPartitionColumn.length); // decode value rowData = deserializer.deserialize(value); return true; } catch (IOException | SerDeException | RuntimeException e) { closeWithSuppression(e); throw new PrestoException(HIVE_CURSOR_ERROR.toErrorCode(), e); } }
@Override public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { org.apache.hadoop.mapred.InputSplit oldSplit; if (split.getClass() == FileSplit.class) { oldSplit = new org.apache.hadoop.mapred.FileSplit( ((FileSplit) split).getPath(), ((FileSplit) split).getStart(), ((FileSplit) split).getLength(), split.getLocations()); } else { oldSplit = ((InputSplitWrapper) split).realSplit; } @SuppressWarnings("unchecked") Reporter reporter = new Reporter() { // Reporter interface over ctx final TaskInputOutputContext ioCtx = context instanceof TaskInputOutputContext ? (TaskInputOutputContext) context : null; public void progress() { HadoopCompat.progress(context); } // @Override public float getProgress() { return (ioCtx != null) ? ioCtx.getProgress() : 0; } public void setStatus(String status) { if (ioCtx != null) HadoopCompat.setStatus(ioCtx, status); } public void incrCounter(String group, String counter, long amount) { if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(group, counter), amount); } @SuppressWarnings("unchecked") public void incrCounter(Enum<?> key, long amount) { if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(key), amount); } public org.apache.hadoop.mapred.InputSplit getInputSplit() throws UnsupportedOperationException { throw new UnsupportedOperationException(); } public Counter getCounter(String group, String name) { return ioCtx != null ? (Counter) HadoopCompat.getCounter(ioCtx, group, name) : null; } @SuppressWarnings("unchecked") public Counter getCounter(Enum<?> name) { return ioCtx != null ? (Counter) ioCtx.getCounter(name) : null; } }; realReader = realInputFormat.getRecordReader( oldSplit, (JobConf) HadoopCompat.getConfiguration(context), reporter); keyObj = realReader.createKey(); valueObj = realReader.createValue(); }
@Override public float getProgress() throws IOException { return realReader.getProgress(); }
@Override public void close() throws IOException { realReader.close(); }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public boolean next(K key, V value) throws IOException { if (ExecMapper.getDone()) return false; return recordReader.next(key, value); }
public float getProgress() throws IOException { return recordReader.getProgress(); }
public long getPos() throws IOException { return recordReader.getPos(); }
public V createValue() { return (V) recordReader.createValue(); }
public K createKey() { return (K) recordReader.createKey(); }
/** Forward close request to proxied RR. */ public void close() throws IOException { rr.close(); }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { return realReader.next(keyObj, valueObj); }
public void close() throws IOException { recordReader.close(); }
/** * Read the next k,v pair into the head of this object; return true iff the RR and this are * exhausted. */ protected boolean next() throws IOException { empty = !rr.next(khead, vhead); return hasNext(); }