Java RecordReader 예제들, org.apache.hadoop.mapred.RecordReader Java 예제들

예제 #1

0

파일 보기

파일: TestHiveAccumuloTableInputFormat.java 프로젝트: Leolh/hive

  @Test
  public void testGetOnlyName() throws Exception {
    FileInputFormat.addInputPath(conf, new Path("unused"));

    InputSplit[] splits = inputformat.getSplits(conf, 0);
    assertEquals(splits.length, 1);
    RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
    Text rowId = new Text("r1");
    AccumuloHiveRow row = new AccumuloHiveRow();
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes());

    rowId = new Text("r2");
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes());

    rowId = new Text("r3");
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes());

    assertFalse(reader.next(rowId, row));
  }

예제 #2

0

파일 보기

파일: BuildWikipediaForwardIndex.java 프로젝트: sdiao/Cloud9

    public void run(
        RecordReader<IntWritable, WikipediaPage> input,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      IntWritable key = new IntWritable();
      WikipediaPage value = new WikipediaPage();

      long pos = -1;
      long prevPos = -1;

      int prevDocno = 0;

      pos = input.getPos();
      while (input.next(key, value)) {
        if (prevPos != -1 && prevPos != pos) {
          LOG.info(
              "- beginning of block at " + prevPos + ", docno:" + prevDocno + ", file:" + fileno);
          keyOut.set(prevDocno);
          valOut.set(prevPos + "\t" + fileno);
          output.collect(keyOut, valOut);
          reporter.incrCounter(Blocks.Total, 1);
        }

        prevPos = pos;
        pos = input.getPos();
        prevDocno = key.get();
      }
    }

예제 #3

0

파일 보기

파일: Step0JobTest.java 프로젝트: maximzhao/Mahout-GSOC-LibLinear

  public void testStep0Mapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);
    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);

    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

      LongWritable key = reader.createKey();
      Text value = reader.createValue();

      Step0Mapper mapper = new Step0Mapper();
      mapper.configure(p);

      Long firstKey = null;
      int size = 0;

      while (reader.next(key, value)) {
        if (firstKey == null) {
          firstKey = key.get();
        }

        mapper.map(key, value, collector, reporter);

        size++;
      }

      mapper.close();

      // validate the mapper's output
      assertEquals(p, collector.keys[p]);
      assertEquals(firstKey.longValue(), collector.values[p].getFirstId());
      assertEquals(size, collector.values[p].getSize());
    }
  }

예제 #4

0

파일 보기

파일: TestHiveAccumuloTableInputFormat.java 프로젝트: Leolh/hive

 @Test
 public void testGetNone() throws Exception {
   FileInputFormat.addInputPath(conf, new Path("unused"));
   conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1");
   InputSplit[] splits = inputformat.getSplits(conf, 0);
   assertEquals(splits.length, 1);
   RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
   Text rowId = new Text("r1");
   AccumuloHiveRow row = new AccumuloHiveRow();
   row.setRowId("r1");
   assertFalse(reader.next(rowId, row));
 }

예제 #5

0

파일 보기

파일: TestInputFormat.java 프로젝트: MarvinLiu0810/HiTune

  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }

예제 #6

0

파일 보기

파일: S3GetMetdataJob.java 프로젝트: arefugee/commoncrawl-crawler

  public void run(
      RecordReader<Text, ArcFileItem> input,
      OutputCollector<Text, CrawlURLMetadata> output,
      Reporter reporter)
      throws IOException {

    int lastValidPos = 0;
    try {
      // allocate key & value instances that are re-used for all entries
      Text key = input.createKey();
      ArcFileItem value = input.createValue();

      while (input.next(key, value)) {

        lastValidPos = value.getArcFilePos();

        // map pair to output
        map(key, value, output, reporter);
      }
    } catch (IOException e) {

      String errorMessage =
          "Exception processing Split:"
              + _splitDetails
              + " Exception:"
              + StringUtils.stringifyException(e);
      LOG.error(errorMessage);

      if (_attemptID.getId() == 0
          || (lastValidPos == 0 && _attemptID.getId() != _maxAttemptsPerTask - 1)) {
        throw new IOException(errorMessage);
      }

      // and just ignore the message
    } catch (Throwable e) {
      String errorMessage =
          "Unknown Exception processing Split:"
              + _splitDetails
              + " Exception:"
              + StringUtils.stringifyException(e);
      LOG.error(errorMessage);
      // if attempt number is not max attempt number configured...
      if (_attemptID.getId() != _maxAttemptsPerTask - 1) {
        // then bubble up exception
        throw new IOException(errorMessage);
      }

    } finally {
      close();
    }
  }

예제 #7

0

파일 보기

  private void writeThenReadByRecordReader(
      int intervalRecordCount,
      int writeCount,
      int splitNumber,
      long minSplitSize,
      CompressionCodec codec)
      throws IOException {
    Path testDir =
        new Path(System.getProperty("test.data.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount);

    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
      BytesRefWritable cu = null;
      cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
      bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
      if (i == intervalRecordCount) {
        System.out.println("write position:" + writer.getLength());
      }
      writer.append(bytes);
    }
    writer.close();

    RCFileInputFormat inputFormat = new RCFileInputFormat();
    JobConf jonconf = new JobConf(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    jonconf.setLong("mapred.min.split.size", minSplitSize);
    InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
    assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.length; i++) {
      int previousReadCount = readCount;
      RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
      Object key = rr.createKey();
      Object value = rr.createValue();
      while (rr.next(key, value)) {
        readCount++;
      }
      System.out.println("The " + i + "th split read " + (readCount - previousReadCount));
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
  }

예제 #8

0

파일 보기

파일: TestDatabusInputFormat.java 프로젝트: sunilkalva/pintail

  /**
   * read the the given input split.
   *
   * @return List : List of read messages
   */
  private List<Message> readSplit(
      DatabusInputFormat format, InputSplit split, JobConf job, Reporter reporter)
      throws IOException {
    List<Message> result = new ArrayList<Message>();
    RecordReader<LongWritable, Message> reader = format.getRecordReader(split, job, reporter);
    LongWritable key = ((DatabusRecordReader) reader).createKey();
    Message value = ((DatabusRecordReader) reader).createValue();

    while (((DatabusRecordReader) reader).next(key, value)) {
      result.add(value);
      value = (Message) ((DatabusRecordReader) reader).createValue();
    }
    reader.close();
    return result;
  }

예제 #9

0

파일 보기

파일: GenericHiveRecordCursor.java 프로젝트: ZheYuan/presto

 private void updateCompletedBytes() {
   try {
     long newCompletedBytes = (long) (totalBytes * recordReader.getProgress());
     completedBytes = min(totalBytes, max(completedBytes, newCompletedBytes));
   } catch (IOException ignored) {
   }
 }

예제 #10

0

파일 보기

파일: WrappedRecordReader.java 프로젝트: ukulililixl/core

 /** For a given RecordReader rr, occupy position id in collector. */
 WrappedRecordReader(int id, RecordReader<K, U> rr, Class<? extends WritableComparator> cmpcl)
     throws IOException {
   this.id = id;
   this.rr = rr;
   khead = rr.createKey();
   vhead = rr.createValue();
   try {
     cmp = (null == cmpcl) ? WritableComparator.get(khead.getClass()) : cmpcl.newInstance();
   } catch (InstantiationException e) {
     throw (IOException) new IOException().initCause(e);
   } catch (IllegalAccessException e) {
     throw (IOException) new IOException().initCause(e);
   }
   vjoin = new StreamBackedIterator<U>();
   next();
 }

예제 #11

0

파일 보기

파일: TestHiveAccumuloTableInputFormat.java 프로젝트: Leolh/hive

 @Test
 public void testHiveAccumuloRecord() throws Exception {
   FileInputFormat.addInputPath(conf, new Path("unused"));
   InputSplit[] splits = inputformat.getSplits(conf, 0);
   assertEquals(splits.length, 1);
   RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
   Text rowId = new Text("r1");
   AccumuloHiveRow row = new AccumuloHiveRow();
   row.add(COLUMN_FAMILY.toString(), NAME.toString(), "brian".getBytes());
   row.add(COLUMN_FAMILY.toString(), SID.toString(), parseIntBytes("1"));
   row.add(COLUMN_FAMILY.toString(), DEGREES.toString(), parseDoubleBytes("44.5"));
   row.add(COLUMN_FAMILY.toString(), MILLIS.toString(), parseLongBytes("555"));
   assertTrue(reader.next(rowId, row));
   assertEquals(rowId.toString(), row.getRowId());
   assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
   assertArrayEquals("brian".getBytes(), row.getValue(COLUMN_FAMILY, NAME));
   assertTrue(row.hasFamAndQual(COLUMN_FAMILY, SID));
   assertArrayEquals(parseIntBytes("1"), row.getValue(COLUMN_FAMILY, SID));
   assertTrue(row.hasFamAndQual(COLUMN_FAMILY, DEGREES));
   assertArrayEquals(parseDoubleBytes("44.5"), row.getValue(COLUMN_FAMILY, DEGREES));
   assertTrue(row.hasFamAndQual(COLUMN_FAMILY, MILLIS));
   assertArrayEquals(parseLongBytes("555"), row.getValue(COLUMN_FAMILY, MILLIS));
 }

예제 #12

0

파일 보기

파일: TeraInputFormat.java 프로젝트: ifloating/hadoop-source-reading

 /**
  * Use the input splits to take samples of the input and generate sample keys. By default reads
  * 100,000 keys from 10 locations in the input, sorts them and picks N-1 keys to generate N
  * equally sized partitions.
  *
  * @param conf the job to sample
  * @param partFile where to write the output file to
  * @throws IOException if something goes wrong
  */
 public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
   TeraInputFormat inFormat = new TeraInputFormat();
   TextSampler sampler = new TextSampler();
   Text key = new Text();
   Text value = new Text();
   int partitions = conf.getNumReduceTasks();
   long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
   InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
   int samples = Math.min(10, splits.length);
   long recordsPerSample = sampleSize / samples;
   int sampleStep = splits.length / samples;
   long records = 0;
   // take N samples from different parts of the input
   for (int i = 0; i < samples; ++i) {
     RecordReader<Text, Text> reader =
         inFormat.getRecordReader(splits[sampleStep * i], conf, null);
     while (reader.next(key, value)) {
       sampler.addKey(key);
       records += 1;
       if ((i + 1) * recordsPerSample <= records) {
         break;
       }
     }
   }
   FileSystem outFs = partFile.getFileSystem(conf);
   if (outFs.exists(partFile)) {
     outFs.delete(partFile, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   for (Text split : sampler.createPartitions(partitions)) {
     writer.append(split, nullValue);
   }
   writer.close();
 }

예제 #13

0

파일 보기

파일: GenericHiveRecordCursor.java 프로젝트: ZheYuan/presto

  @Override
  public void close() {
    // some hive input formats are broken and bad things can happen if you close them multiple times
    if (closed) {
      return;
    }
    closed = true;

    updateCompletedBytes();

    try {
      recordReader.close();
    } catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }

예제 #14

0

파일 보기

파일: TextRecordParser.java 프로젝트: brockn/IterativeReduce

  @Override
  public void setFile(String file, long offset, long length) {
    JobConf defaultConf = new JobConf();
    this.split = new FileSplit(new Path(file), offset, length, defaultConf);

    this.jobConf = defaultConf;
    // this.split = split;
    this.input_format = new TextInputFormat();

    try {
      this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    this.key = reader.createKey();
  }

예제 #15

0

파일 보기

파일: TestHiveAccumuloTableInputFormat.java 프로젝트: Leolh/hive

  @Test
  public void testGetProtectedField() throws Exception {
    FileInputFormat.addInputPath(conf, new Path("unused"));

    BatchWriterConfig writerConf = new BatchWriterConfig();
    BatchWriter writer = con.createBatchWriter(TEST_TABLE, writerConf);

    Authorizations origAuths = con.securityOperations().getUserAuthorizations(USER);
    con.securityOperations()
        .changeUserAuthorizations(USER, new Authorizations(origAuths.toString() + ",foo"));

    Mutation m = new Mutation("r4");
    m.put(COLUMN_FAMILY, NAME, new ColumnVisibility("foo"), new Value("frank".getBytes()));
    m.put(COLUMN_FAMILY, SID, new ColumnVisibility("foo"), new Value(parseIntBytes("4")));
    m.put(COLUMN_FAMILY, DEGREES, new ColumnVisibility("foo"), new Value(parseDoubleBytes("60.6")));
    m.put(COLUMN_FAMILY, MILLIS, new ColumnVisibility("foo"), new Value(parseLongBytes("777")));

    writer.addMutation(m);
    writer.close();

    conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo");

    InputSplit[] splits = inputformat.getSplits(conf, 0);
    assertEquals(splits.length, 1);
    RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
    Text rowId = new Text("r1");
    AccumuloHiveRow row = new AccumuloHiveRow();
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes());

    rowId = new Text("r2");
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes());

    rowId = new Text("r3");
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes());

    rowId = new Text("r4");
    assertTrue(reader.next(rowId, row));
    assertEquals(row.getRowId(), rowId.toString());
    assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME));
    assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "frank".getBytes());

    assertFalse(reader.next(rowId, row));
  }

예제 #16

0

파일 보기

파일: GenericHiveRecordCursor.java 프로젝트: ZheYuan/presto

  @Override
  public boolean advanceNextPosition() {
    try {
      if (closed || !recordReader.next(key, value)) {
        close();
        return false;
      }

      // reset loaded flags
      // partition keys are already loaded, but everything else is not
      System.arraycopy(isPartitionColumn, 0, loaded, 0, isPartitionColumn.length);

      // decode value
      rowData = deserializer.deserialize(value);

      return true;
    } catch (IOException | SerDeException | RuntimeException e) {
      closeWithSuppression(e);
      throw new PrestoException(HIVE_CURSOR_ERROR.toErrorCode(), e);
    }
  }

예제 #17

0

파일 보기

파일: MapReduceInputFormatWrapper.java 프로젝트: HimanshuBhardwaj/elephant-bird

    @Override
    public void initialize(InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {

      org.apache.hadoop.mapred.InputSplit oldSplit;

      if (split.getClass() == FileSplit.class) {
        oldSplit =
            new org.apache.hadoop.mapred.FileSplit(
                ((FileSplit) split).getPath(),
                ((FileSplit) split).getStart(),
                ((FileSplit) split).getLength(),
                split.getLocations());
      } else {
        oldSplit = ((InputSplitWrapper) split).realSplit;
      }

      @SuppressWarnings("unchecked")
      Reporter reporter = new Reporter() { // Reporter interface over ctx

            final TaskInputOutputContext ioCtx =
                context instanceof TaskInputOutputContext ? (TaskInputOutputContext) context : null;

            public void progress() {
              HadoopCompat.progress(context);
            }

            // @Override
            public float getProgress() {
              return (ioCtx != null) ? ioCtx.getProgress() : 0;
            }

            public void setStatus(String status) {
              if (ioCtx != null) HadoopCompat.setStatus(ioCtx, status);
            }

            public void incrCounter(String group, String counter, long amount) {
              if (ioCtx != null)
                HadoopCompat.incrementCounter(ioCtx.getCounter(group, counter), amount);
            }

            @SuppressWarnings("unchecked")
            public void incrCounter(Enum<?> key, long amount) {
              if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(key), amount);
            }

            public org.apache.hadoop.mapred.InputSplit getInputSplit()
                throws UnsupportedOperationException {
              throw new UnsupportedOperationException();
            }

            public Counter getCounter(String group, String name) {
              return ioCtx != null ? (Counter) HadoopCompat.getCounter(ioCtx, group, name) : null;
            }

            @SuppressWarnings("unchecked")
            public Counter getCounter(Enum<?> name) {
              return ioCtx != null ? (Counter) ioCtx.getCounter(name) : null;
            }
          };

      realReader =
          realInputFormat.getRecordReader(
              oldSplit, (JobConf) HadoopCompat.getConfiguration(context), reporter);

      keyObj = realReader.createKey();
      valueObj = realReader.createValue();
    }

예제 #18

0

파일 보기

파일: MapReduceInputFormatWrapper.java 프로젝트: HimanshuBhardwaj/elephant-bird

 @Override
 public float getProgress() throws IOException {
   return realReader.getProgress();
 }

예제 #19

0

파일 보기

파일: MapReduceInputFormatWrapper.java 프로젝트: HimanshuBhardwaj/elephant-bird

 @Override
 public void close() throws IOException {
   realReader.close();
 }

예제 #20

0

파일 보기

파일: GenericHiveRecordCursor.java 프로젝트: ZheYuan/presto

  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      DateTimeZone sessionTimeZone) {
    checkNotNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    checkNotNull(splitSchema, "splitSchema is null");
    checkNotNull(partitionKeys, "partitionKeys is null");
    checkNotNull(columns, "columns is null");
    checkArgument(!columns.isEmpty(), "columns is empty");
    checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
    checkNotNull(sessionTimeZone, "sessionTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;
    this.sessionTimeZone = sessionTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = column.getType();
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey.nameGetter());
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8);

        Type type = types[columnIndex];
        if (BOOLEAN.equals(type)) {
          if (isTrue(bytes, 0, bytes.length)) {
            booleans[columnIndex] = true;
          } else if (isFalse(bytes, 0, bytes.length)) {
            booleans[columnIndex] = false;
          } else {
            String valueString = new String(bytes, Charsets.UTF_8);
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '%s' for BOOLEAN partition key %s",
                    valueString, names[columnIndex]));
          }
        } else if (BIGINT.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for BIGINT partition key %s", names[columnIndex]));
          }
          longs[columnIndex] = parseLong(bytes, 0, bytes.length);
        } else if (DOUBLE.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex]));
          }
          doubles[columnIndex] = parseDouble(bytes, 0, bytes.length);
        } else if (VARCHAR.equals(type)) {
          slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length));
        } else {
          throw new UnsupportedOperationException("Unsupported column type: " + type);
        }
      }
    }
  }

예제 #21

0

파일 보기

파일: GenericHiveRecordCursor.java 프로젝트: cdma/presto

  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      TypeManager typeManager) {
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nulls[columnIndex] = true;
        } else if (BOOLEAN.equals(type)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (BIGINT.equals(type)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (INTEGER.equals(type)) {
          longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name);
        } else if (SMALLINT.equals(type)) {
          longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name);
        } else if (TINYINT.equals(type)) {
          longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name);
        } else if (DOUBLE.equals(type)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (isVarcharType(type)) {
          slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type);
        } else if (DATE.equals(type)) {
          longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name);
        } else if (TIMESTAMP.equals(type)) {
          longs[columnIndex] =
              timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name);
        } else if (isShortDecimal(type)) {
          longs[columnIndex] =
              shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else if (isLongDecimal(type)) {
          slices[columnIndex] =
              longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }

예제 #22

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public boolean next(K key, V value) throws IOException {
   if (ExecMapper.getDone()) return false;
   return recordReader.next(key, value);
 }

예제 #23

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public float getProgress() throws IOException {
   return recordReader.getProgress();
 }

예제 #24

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public long getPos() throws IOException {
   return recordReader.getPos();
 }

예제 #25

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public V createValue() {
   return (V) recordReader.createValue();
 }

예제 #26

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public K createKey() {
   return (K) recordReader.createKey();
 }

예제 #27

0

파일 보기

파일: WrappedRecordReader.java 프로젝트: ukulililixl/core

 /** Forward close request to proxied RR. */
 public void close() throws IOException {
   rr.close();
 }

예제 #28

0

파일 보기

파일: MapReduceInputFormatWrapper.java 프로젝트: HimanshuBhardwaj/elephant-bird

 @Override
 public boolean nextKeyValue() throws IOException, InterruptedException {
   return realReader.next(keyObj, valueObj);
 }

예제 #29

0

파일 보기

파일: HiveRecordReader.java 프로젝트: EatCodeCat/tdw

 public void close() throws IOException {
   recordReader.close();
 }

예제 #30

0

파일 보기

파일: WrappedRecordReader.java 프로젝트: ukulililixl/core

 /**
  * Read the next k,v pair into the head of this object; return true iff the RR and this are
  * exhausted.
  */
 protected boolean next() throws IOException {
   empty = !rr.next(khead, vhead);
   return hasNext();
 }