Java Text.getLength示例，org.apache.hadoop.io.Text.getLength Java示例

示例#1

0

显示文件

文件： PatternRecordReader.java 项目： StephanieMak/hadoop-mapreduce

  private int readNext(Text text, int maxLineLength, int maxBytesToConsume) throws IOException {

    int offset = 0;
    text.clear();
    Text tmp = new Text();

    for (int i = 0; i < maxBytesToConsume; i++) {

      int offsetTmp = in.readLine(tmp, maxLineLength, maxBytesToConsume);
      offset += offsetTmp;
      Matcher m = delimiterPattern.matcher(tmp.toString());

      // End of File
      if (offsetTmp == 0) {
        break;
      }

      if (m.matches()) {
        break;
      } else {
        // Append value to record
        text.append(EOL.getBytes(), 0, EOL.getLength());
        text.append(tmp.getBytes(), 0, tmp.getLength());
      }
    }

    return offset;
  }

示例#2

0

显示文件

文件： DataOutputStreamUtils.java 项目： rygim/vertexium

 public static void encodeText(DataOutputStream out, Text text) throws IOException {
   if (text == null) {
     out.writeInt(-1);
     return;
   }
   out.writeInt(text.getLength());
   out.write(text.getBytes(), 0, text.getLength());
 }

示例#3

0

显示文件

文件： QseqInputFormat.java 项目： ridvandongelci/Hadoop-BAM

    private void scanQseqLine(Text line, Text key, SequencedFragment fragment) {
      setFieldPositionsAndLengths(line);

      // Build the key.  We concatenate all fields from 0 to 5 (machine to y-pos)
      // and then the read number, replacing the tabs with colons.
      key.clear();
      // append up and including field[5]
      key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]);
      // replace tabs with :
      byte[] bytes = key.getBytes();
      int temporaryEnd = key.getLength();
      for (int i = 0; i < temporaryEnd; ++i) if (bytes[i] == '\t') bytes[i] = ':';
      // append the read number
      key.append(
          line.getBytes(),
          fieldPositions[7] - 1,
          fieldLengths[7] + 1); // +/- 1 to catch the preceding tab.
      // convert the tab preceding the read number into a :
      key.getBytes()[temporaryEnd] = ':';

      // now the fragment
      try {
        fragment.clear();
        fragment.setInstrument(Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0]));
        fragment.setRunNumber(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1])));
        // fragment.setFlowcellId();
        fragment.setLane(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2])));
        fragment.setTile(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3])));
        fragment.setXpos(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4])));
        fragment.setYpos(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5])));
        fragment.setRead(
            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7])));
        fragment.setFilterPassed(line.getBytes()[fieldPositions[10]] != '0');
        // fragment.setControlNumber();
        if (fieldLengths[6] > 0
            && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence
        fragment.setIndexSequence(null);
        else
          fragment.setIndexSequence(
              Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N'));
      } catch (CharacterCodingException e) {
        throw new FormatException(
            "Invalid character format at "
                + makePositionMessage(this.pos - line.getLength())
                + "; line: "
                + line);
      }

      fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]);
      fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]);
    }

示例#4

0

显示文件

文件： TestTextInputFormat.java 项目： Ronald33/hadoop-0.21

 /**
  * Test readLine for various kinds of line termination sequneces. Varies buffer size to stress
  * test. Also check that returned value matches the string length.
  *
  * @throws Exception
  */
 @Test
 public void testNewLines() throws Exception {
   final String STR = "a\nbb\n\nccc\rdddd\r\r\r\n\r\neeeee";
   final int STRLENBYTES = STR.getBytes().length;
   Text out = new Text();
   for (int bufsz = 1; bufsz < STRLENBYTES + 1; ++bufsz) {
     LineReader in = makeStream(STR, bufsz);
     int c = 0;
     c += in.readLine(out); // "a"\n
     assertEquals("line1 length, bufsz:" + bufsz, 1, out.getLength());
     c += in.readLine(out); // "bb"\n
     assertEquals("line2 length, bufsz:" + bufsz, 2, out.getLength());
     c += in.readLine(out); // ""\n
     assertEquals("line3 length, bufsz:" + bufsz, 0, out.getLength());
     c += in.readLine(out); // "ccc"\r
     assertEquals("line4 length, bufsz:" + bufsz, 3, out.getLength());
     c += in.readLine(out); // dddd\r
     assertEquals("line5 length, bufsz:" + bufsz, 4, out.getLength());
     c += in.readLine(out); // ""\r
     assertEquals("line6 length, bufsz:" + bufsz, 0, out.getLength());
     c += in.readLine(out); // ""\r\n
     assertEquals("line7 length, bufsz:" + bufsz, 0, out.getLength());
     c += in.readLine(out); // ""\r\n
     assertEquals("line8 length, bufsz:" + bufsz, 0, out.getLength());
     c += in.readLine(out); // "eeeee"EOF
     assertEquals("line9 length, bufsz:" + bufsz, 5, out.getLength());
     assertEquals("end of file, bufsz: " + bufsz, 0, in.readLine(out));
     assertEquals("total bytes, bufsz: " + bufsz, c, STRLENBYTES);
   }
 }

示例#5

0

显示文件

文件： DynamicFileListRecordReaderTest.java 项目： GoogleCloudPlatform/bigdata-interop

 /** Creates file {@code outfile} adding a newline between each element of {@code lines}. */
 private void writeFile(Path outfile, List<String> lines) throws IOException {
   FSDataOutputStream dataOut = fileSystem.create(outfile);
   Text newline = new Text("\n");
   Text textLine = new Text();
   for (String line : lines) {
     textLine.set(line);
     dataOut.write(textLine.getBytes(), 0, textLine.getLength());
     dataOut.write(newline.getBytes(), 0, newline.getLength());
   }
   dataOut.close();
 }

示例#6

0

显示文件

文件： TeraInputFormat.java 项目： ifloating/hadoop-source-reading

 public boolean next(Text key, Text value) throws IOException {
   if (in.next(junk, line)) {
     if (line.getLength() < KEY_LENGTH) {
       key.set(line);
       value.clear();
     } else {
       byte[] bytes = line.getBytes();
       key.set(bytes, 0, KEY_LENGTH);
       value.set(bytes, KEY_LENGTH, line.getLength() - KEY_LENGTH);
     }
     return true;
   } else {
     return false;
   }
 }

示例#7

0

显示文件

文件： CuboidReducer.java 项目： chujiaqiang/kylin

  @Override
  public void reduce(Text key, Iterable<Text> values, Context context)
      throws IOException, InterruptedException {
    aggs.reset();

    for (Text value : values) {
      codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input);
      if (cuboidLevel > 0) {
        aggs.aggregate(input, needAggr);
      } else {
        aggs.aggregate(input);
      }
    }
    aggs.collectStates(result);

    ByteBuffer valueBuf = codec.encode(result);

    outputValue.set(valueBuf.array(), 0, valueBuf.position());
    context.write(key, outputValue);

    counter++;
    if (counter % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
      logger.info("Handled " + counter + " records!");
    }
  }

示例#8

0

显示文件

文件： NotInFinder.java 项目： KGayan/Acacia

    public synchronized boolean next(LongWritable key, Text value) throws IOException {
      boolean gotsomething;
      boolean retval;
      byte space[] = {' '};
      int counter = 0;
      String ln = null;
      value.clear();
      gotsomething = false;

      do {
        retval = lineRecord.next(lineKey, lineValue);
        if (retval) {
          if (lineValue.toString().length() > 0) {
            ln = lineValue.toString();
            lineValue.set(
                ln.split("	")[
                    0]); // here we basically get the first element from a KV such as '4847570 -1'
            byte[] rawline = lineValue.getBytes();
            int rawlinelen = lineValue.getLength();
            value.append(rawline, 0, rawlinelen);
            value.append(space, 0, 1);
            counter++;
          }
          gotsomething = true;
        } else {
          break;
        }
      } while (counter < MAX_LINE_COUNT);

      // System.out.println("ParagraphRecordReader::next() returns "+gotsomething+" after setting
      // value to: ["+value.toString()+"]");
      return gotsomething;
    }

示例#9

0

显示文件

文件： LineRecordReader.java 项目： JoeChien23/hadoop

 private int skipUtfByteOrderMark() throws IOException {
   // Strip BOM(Byte Order Mark)
   // Text only support UTF-8, we only need to check UTF-8 BOM
   // (0xEF,0xBB,0xBF) at the start of the text stream.
   int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE);
   int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
   // Even we read 3 extra bytes for the first line,
   // we won't alter existing behavior (no backwards incompat issue).
   // Because the newSize is less than maxLineLength and
   // the number of bytes copied to Text is always no more than newSize.
   // If the return size from readLine is not less than maxLineLength,
   // we will discard the current line and read the next line.
   pos += newSize;
   int textLength = value.getLength();
   byte[] textBytes = value.getBytes();
   if ((textLength >= 3)
       && (textBytes[0] == (byte) 0xEF)
       && (textBytes[1] == (byte) 0xBB)
       && (textBytes[2] == (byte) 0xBF)) {
     // find UTF-8 BOM, strip it.
     LOG.info("Found UTF-8 BOM and skipped it");
     textLength -= 3;
     newSize -= 3;
     if (textLength > 0) {
       // It may work to use the same buffer and not do the copyBytes
       textBytes = value.copyBytes();
       value.set(textBytes, 3, textLength);
     } else {
       value.clear();
     }
   }
   return newSize;
 }

示例#10

0

显示文件

文件： CSRConverter.java 项目： KGayan/Acacia

    public synchronized boolean next(LongWritable key, Text value) throws IOException {
      boolean gotsomething;
      boolean retval;
      byte space[] = {' '};
      int counter = 0;
      value.clear();
      gotsomething = false;

      do {
        retval = lineRecord.next(lineKey, lineValue);
        if (retval) {
          if (lineValue.toString().length() > 0) {
            byte[] rawline = lineValue.getBytes();
            int rawlinelen = lineValue.getLength();
            value.append(rawline, 0, rawlinelen);
            value.append(space, 0, 1);
            counter++;
          }
          gotsomething = true;
        } else {
          break;
        }
      } while (counter < MAX_LINE_COUNT);

      return gotsomething;
    }

示例#11

0

显示文件

文件： MutilCharRecordReader.java 项目： sven0726/hive

  @Override
  public boolean next(LongWritable key, Text value) throws IOException {

    while (pos < end) {
      key.set(pos);

      int newSize =
          lineReader.readLine(
              value,
              maxLineLength,
              Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
      String strReplace = value.toString().replace("$#$", "\001");
      Text txtReplace = new Text();
      txtReplace.set(strReplace);

      value.set(txtReplace.getBytes(), 0, txtReplace.getLength());

      if (newSize == 0) return false;
      pos += newSize;
      if (newSize < maxLineLength) return true;
      // line too long. try again
      log.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }
    return false;
  }

示例#12

0

显示文件

文件： PagerankData.java 项目： ConeyLiu/HiBench

    public void map(
        LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter)
        throws IOException {

      int slotId = Integer.parseInt(value.toString().trim());
      html.fireRandom(slotId);

      long[] range = HtmlCore.getPageRange(slotId, pages, slotpages);

      /** For output collect */
      for (long i = range[0]; i < range[1]; i++) {
        key.set(i);

        long[] linkids = html.genPureLinkIds();
        for (int j = 0; j < linkids.length; j++) {
          String to = Long.toString(linkids[j]);
          Text v = new Text(to);
          output.collect(key, v);
          reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8 + v.getLength());
        }

        if (0 == (i % 10000)) {
          log.info("still running: " + (i - range[0]) + " of " + slotpages);
        }
      }
    }

示例#13

0

显示文件

文件： TestLzoTextInputFormat.java 项目： huagetai/elephant-bird

  /**
   * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5
   * that too, to verify that it all went ok.
   *
   * @param testWithIndex Should we index or not?
   * @param charsToOutput How many characters of random data should we output.
   * @throws IOException
   * @throws NoSuchAlgorithmException
   * @throws InterruptedException
   */
  private void runTest(boolean testWithIndex, int charsToOutput)
      throws IOException, NoSuchAlgorithmException, InterruptedException {

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));

    FileSystem.getLocal(conf).close(); // remove cached filesystem (if any)
    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir_, true);
    localFs.mkdirs(outputDir_);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir_);

    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
      Path lzoFile = new Path(outputDir_, lzoFileName_);
      LzoIndex.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir_);

    List<InputSplit> is = inputFormat.getSplits(job);
    // verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
      assertEquals(3, is.size());
    } else {
      assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
      RecordReader<LongWritable, Text> rr =
          inputFormat.createRecordReader(inputSplit, attemptContext);
      rr.initialize(inputSplit, attemptContext);

      while (rr.nextKeyValue()) {
        Text value = rr.getCurrentValue();

        md5_.update(value.getBytes(), 0, value.getLength());
      }

      rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5_.digest()));
  }

示例#14

0

显示文件

文件： NumericIndexStrategyFilterIterator.java 项目： ngageoint/geowave

 private boolean inBounds(final Key k) {
   k.getRow(row);
   final MultiDimensionalCoordinates coordinates =
       indexStrategy.getCoordinatesPerDimension(
           new ByteArrayId(new GeowaveRowId(row.getBytes(), row.getLength()).getInsertionId()));
   return rangeCache.inBounds(coordinates);
 }

示例#15

0

显示文件

文件： Spinn3rMaper.java 项目： snap-stanford/news-search

  /** Called for every record in the data */
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    /** Skip enormous documents, due to memory problems and since regex cannot handle them. */
    if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) {
      context.getCounter(ProcessingTime.SKIPPED).increment(1);
      return;
    }

    /** Parse document and measure time */
    t1 = System.nanoTime();
    Spinn3rDocument d = new Spinn3rDocument(value.toString());
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.PARSING).increment(t2 - t1);

    /** Return only those documents that satisfy search conditions */
    t1 = System.nanoTime();
    t = filter.documentSatisfies(d);
    t2 = System.nanoTime();
    context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1);

    /** Output if satisfies */
    if (t) {
      if (cmdMap.hasOption("formatF5")) {
        context.write(new Text(d.toStringF5()), NullWritable.get());
      } else {
        context.write(new Text(d.toString()), NullWritable.get());
      }
    }
  }

示例#16

0

显示文件

文件： DeliverFormatForUVMR.java 项目： wisgood/mobile-core

    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      HashMap<String, Integer> aggResult = new HashMap<String, Integer>();

      newValue.clear();
      for (Text val : values) {
        String[] fields = val.toString().split(":");
        int num = Integer.parseInt(fields[0]);
        String dim = fields[1];

        if (aggResult.containsKey(dim)) {
          aggResult.put(dim, aggResult.get(dim).intValue() + num);
        } else {
          aggResult.put(dim, num);
        }
      }

      for (String hashKey : aggResult.keySet()) {
        String singleValue = hashKey + "," + aggResult.get(hashKey).toString();
        String tab = "\t";
        if (newValue.getLength() > 0) newValue.append(tab.getBytes(), 0, tab.length());
        newValue.append(singleValue.getBytes(), 0, singleValue.length());
      }

      context.write(key, newValue);
    }

示例#17

0

显示文件

文件： StringUnaryUDF.java 项目： sushrutikhar/hive

 /* Set the output string entry i to the contents of Text object t.
  * If t is a null object reference, record that the value is a SQL NULL.
  */
 private static void setString(BytesColumnVector outV, int i, Text t) {
   if (t == null) {
     outV.noNulls = false;
     outV.isNull[i] = true;
     return;
   }
   outV.setVal(i, t.getBytes(), 0, t.getLength());
 }

示例#18

0

显示文件

文件： RawKeyTextOutputFormat.java 项目： GunioRobot/scribe-dependencies

 /**
  * Write the object to the byte stream, handling Text as a special case.
  *
  * @param o the object to print
  * @throws IOException if the write throws, we pass it on
  */
 private void writeObject(Object o) throws IOException {
   if (o instanceof Text) {
     Text to = (Text) o;
     out.write(to.getBytes(), 0, to.getLength());
   } else {
     out.write(o.toString().getBytes(utf8));
   }
 }

示例#19

0

显示文件

文件： CountBytes.java 项目： wistoft/BDM

    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {

      int i = value.getLength();

      context.write(new Text("bytes"), new IntWritable(i));
      context.write(new Text("lines"), new IntWritable(1));
    }

示例#20

0

显示文件

文件： BulkIngestExample.java 项目： joshelser/accumulo

    @Override
    public void map(LongWritable key, Text value, Context output)
        throws IOException, InterruptedException {
      // split on tab
      int index = -1;
      for (int i = 0; i < value.getLength(); i++) {
        if (value.getBytes()[i] == '\t') {
          index = i;
          break;
        }
      }

      if (index > 0) {
        outputKey.set(value.getBytes(), 0, index);
        outputValue.set(value.getBytes(), index + 1, value.getLength() - (index + 1));
        output.write(outputKey, outputValue);
      }
    }

示例#21

0

显示文件

文件： TsvEmitter.java 项目： tottokomakotaro/asakusafw

  @Override
  public void emit(StringOption option) throws IOException {
    startCell();
    if (emitNull(option)) {
      return;
    }
    Text text = option.get();
    if (text.getLength() == 0) {
      return;
    }

    byte[] bytes = text.getBytes();
    ByteBuffer source = ByteBuffer.wrap(bytes, 0, text.getLength());
    decoder.reset();
    decodeBuffer.clear();
    while (true) {
      CoderResult result = decoder.decode(source, decodeBuffer, true);
      if (result.isError()) {
        throw new RecordFormatException(
            MessageFormat.format("Cannot process a character string (\"{0}\")", result));
      }
      if (result.isUnderflow()) {
        consumeDecoded();
        break;
      }
      if (result.isOverflow()) {
        consumeDecoded();
      }
    }
    while (true) {
      CoderResult result = decoder.flush(decodeBuffer);
      if (result.isError()) {
        throw new RecordFormatException(
            MessageFormat.format("Cannot process a character string (\"{0}\")", result));
      }
      if (result.isUnderflow()) {
        consumeDecoded();
        break;
      }
      if (result.isOverflow()) {
        consumeDecoded();
      }
    }
  }

示例#22

0

显示文件

文件： DocumentInputFormat.java 项目： mizinovmv/powerMining

 public boolean jsonToDocument(Text line) {
   if (line.getLength() == 0) {
     return false;
   }
   try {
     doc = DocumentFabric.fromJson(line.toString());
   } catch (Exception e) {
     LOG.error(e.getMessage());
     return false;
   }
   return true;
 }

示例#23

0

显示文件

文件： TestLzoTextInputFormat.java 项目： huagetai/elephant-bird

  /**
   * Creates an lzo file with random data.
   *
   * @param outputDir Output directory.
   * @param fs File system we're using.
   * @param attemptContext Task attempt context, contains task id etc.
   * @throws IOException
   * @throws InterruptedException
   */
  private byte[] createTestInput(
      Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput)
      throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5_.reset();

    try {
      rw = output.getRecordWriter(attemptContext);

      char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

      Random r = new Random(System.currentTimeMillis());
      Text key = new Text();
      Text value = new Text();
      int charsMax = chars.length - 1;
      for (int i = 0; i < charsToOutput; ) {
        i += fillText(chars, r, charsMax, key);
        i += fillText(chars, r, charsMax, value);
        rw.write(key, value);
        md5_.update(key.getBytes(), 0, key.getLength());
        // text output format writes tab between the key and value
        md5_.update("\t".getBytes("UTF-8"));
        md5_.update(value.getBytes(), 0, value.getLength());
      }
    } finally {
      if (rw != null) {
        rw.close(attemptContext);
        OutputCommitter committer = output.getOutputCommitter(attemptContext);
        committer.commitTask(attemptContext);
        committer.cleanupJob(attemptContext);
      }
    }

    byte[] result = md5_.digest();
    md5_.reset();
    return result;
  }

示例#24

0

显示文件

文件： UDFSha1.java 项目： honglvlan/BDST

  /** Convert String to SHA-1 */
  public Text evaluate(Text n) {
    if (n == null) {
      return null;
    }

    digest.reset();
    digest.update(n.getBytes(), 0, n.getLength());
    byte[] shaBytes = digest.digest();
    String shaHex = Hex.encodeHexString(shaBytes);

    result.set(shaHex);
    return result;
  }

示例#25

0

显示文件

文件： UDFAscii.java 项目： Leolh/hive

  public IntWritable evaluate(Text s) {
    if (s == null) {
      return null;
    }

    if (s.getLength() > 0) {
      result.set(s.getBytes()[0]);
    } else {
      result.set(0);
    }

    return result;
  }

示例#26

0

显示文件

文件： CIngestMapper.java 项目： is/demos

  @Override
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<NullWritable, NullWritable> collector,
      Reporter reporter)
      throws IOException {
    if (value.getLength() == 0) return;

    byte[] raw = value.getBytes();

    Map<String, Object> msg = mapper.readValue(raw, Map.class);
    String rowId = createRowId(msg);

    // System.out.println("rowId:" + rowId.toString());
    if (rowId == null) {
      // TODO ... Error Handler
      return;
    }

    if (mb == null) {
      mb = ks.prepareMutationBatch();
    }
    ColumnListMutation<String> c = mb.withRow(cf, rowId);
    c.putColumn("raw", value.toString(), null);

    if (storeAttirbute) {
      for (String k : msg.keySet()) {
        if (k.startsWith("__")) continue;

        Object v = msg.get(k);

        if (v == null) continue;

        if (v.equals("")) continue;

        c.putColumn(k.toLowerCase(), v.toString(), null);
      }
    }

    try {
      if (mb.getRowCount() > 300) {
        OperationResult<Void> result = mb.execute();
        mb = null;
      }
    } catch (ConnectionException e) {
      e.printStackTrace(); // To change body of catch statement use File | Settings | File
      // Templates.
      mb = null;
    }
  }

示例#27

0

显示文件

文件： PagerankData.java 项目： ConeyLiu/HiBench

    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter)
        throws IOException {

      int slotId = Integer.parseInt(value.toString().trim());
      long[] range = HtmlCore.getPageRange(slotId, pages, slotpages);

      for (long i = range[0]; i < range[1]; i++) {
        key.set(i);
        Text v = new Text(Long.toString(i));
        output.collect(key, v);
        reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8 + v.getLength());
      }
    }

示例#28

0

显示文件

文件： QseqInputFormat.java 项目： ridvandongelci/Hadoop-BAM

    /*
     * Scans the text line to find the position and the lengths of the fields
     * within it. The positions and lengths are saved into the instance arrays
     * 'fieldPositions' and 'fieldLengths'.
     *
     * @exception FormatException Line doesn't have the expected number of fields.
     */
    private void setFieldPositionsAndLengths(Text line) {
      int pos = 0; // the byte position within the record
      int fieldno = 0; // the field index within the record
      while (pos < line.getLength() && fieldno < NUM_QSEQ_COLS) // iterate over each field
      {
        int endpos = line.find(Delim, pos); // the field's end position
        if (endpos < 0) endpos = line.getLength();

        fieldPositions[fieldno] = pos;
        fieldLengths[fieldno] = endpos - pos;

        pos = endpos + 1; // the next starting position is the current end + 1
        fieldno += 1;
      }

      if (fieldno != NUM_QSEQ_COLS)
        throw new FormatException(
            "found "
                + fieldno
                + " fields instead of 11 at "
                + makePositionMessage(this.pos - line.getLength())
                + ". Line: "
                + line);
    }

示例#29

0

显示文件

文件： UDFToInteger.java 项目： chanakaudaya/platform

 /**
  * Convert from string to an integer. This is called for CAST(... AS INT)
  *
  * @param i The string value to convert
  * @return IntWritable
  */
 public IntWritable evaluate(Text i) {
   if (i == null) {
     return null;
   } else {
     try {
       intWritable.set(LazyInteger.parseInt(i.getBytes(), 0, i.getLength(), 10));
       return intWritable;
     } catch (NumberFormatException e) {
       // MySQL returns 0 if the string is not a well-formed numeric value.
       // return IntWritable.valueOf(0);
       // But we decided to return NULL instead, which is more conservative.
       return null;
     }
   }
 }

示例#30

0

显示文件

文件： BulkIngestExample.java 项目： joshelser/accumulo

    @Override
    public void reduce(Text key, Iterable<Text> values, Context output)
        throws IOException, InterruptedException {
      // be careful with the timestamp... if you run on a cluster
      // where the time is whacked you may not see your updates in
      // accumulo if there is already an existing value with a later
      // timestamp in accumulo... so make sure ntp is running on the
      // cluster or consider using logical time... one options is
      // to let accumulo set the time
      long timestamp = System.currentTimeMillis();

      int index = 0;
      for (Text value : values) {
        Key outputKey =
            new Key(key, new Text("colf"), new Text(String.format("col_%07d", index)), timestamp);
        index++;

        Value outputValue = new Value(value.getBytes(), 0, value.getLength());
        output.write(outputKey, outputValue);
      }
    }