Esempio n. 1
0
  public static void setTokensFor(
      ContainerLaunchContext amContainer, List<Path> paths, Configuration conf) throws IOException {
    Credentials credentials = new Credentials();
    // for HDFS
    TokenCache.obtainTokensForNamenodes(credentials, paths.toArray(new Path[0]), conf);
    // for HBase
    obtainTokenForHBase(credentials, conf);
    // for user
    UserGroupInformation currUsr = UserGroupInformation.getCurrentUser();

    Collection<Token<? extends TokenIdentifier>> usrTok = currUsr.getTokens();
    for (Token<? extends TokenIdentifier> token : usrTok) {
      final Text id = new Text(token.getIdentifier());
      LOG.info("Adding user token " + id + " with " + token);
      credentials.addToken(id, token);
    }
    try (DataOutputBuffer dob = new DataOutputBuffer()) {
      credentials.writeTokenStorageToStream(dob);

      if (LOG.isDebugEnabled()) {
        LOG.debug("Wrote tokens. Credentials buffer length: " + dob.getLength());
      }

      ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
      amContainer.setTokens(securityTokens);
    }
  }
  private void requestNewHdfsDelegationToken(
      ApplicationId applicationId, String user, boolean shouldCancelAtEnd)
      throws IOException, InterruptedException {
    // Get new hdfs tokens for this user
    Credentials credentials = new Credentials();
    Token<?>[] newTokens = obtainSystemTokensForUser(user, credentials);

    // Add new tokens to the toRenew list.
    LOG.info(
        "Received new tokens for " + applicationId + ". Received " + newTokens.length + " tokens.");
    if (newTokens.length > 0) {
      for (Token<?> token : newTokens) {
        if (token.isManaged()) {
          DelegationTokenToRenew tokenToRenew =
              new DelegationTokenToRenew(
                  applicationId, token, getConfig(), Time.now(), shouldCancelAtEnd, user);
          // renew the token to get the next expiration date.
          renewToken(tokenToRenew);
          setTimerForTokenRenewal(tokenToRenew);
          appTokens.get(applicationId).add(tokenToRenew);
          LOG.info("Received new token " + token);
        }
      }
    }
    DataOutputBuffer dob = new DataOutputBuffer();
    credentials.writeTokenStorageToStream(dob);
    ByteBuffer byteBuffer = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    rmContext.getSystemCredentialsForApps().put(applicationId, byteBuffer);
  }
  /**
   * Creates a ByteBuffer with serialized {@link Credentials}.
   *
   * @param creds The credentials.
   * @return The ByteBuffer with the credentials.
   * @throws IOException
   */
  public static ByteBuffer createTokenBuffer(Credentials creds) throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();

    creds.writeTokenStorageToStream(dob);

    return ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
  }
Esempio n. 4
0
    /* Initiates a call by sending the parameter to the remote server.
     * Note: this is not called from the Connection thread, but by other
     * threads.
     */
    protected void sendParam(Call call) {
      if (shouldCloseConnection.get()) {
        return;
      }

      // For serializing the data to be written.

      final DataOutputBuffer d = new DataOutputBuffer();
      try {
        if (LOG.isDebugEnabled()) LOG.debug(getName() + " sending #" + call.id);

        d.writeInt(0xdeadbeef); // placeholder for data length
        d.writeInt(call.id);
        call.param.write(d);
        byte[] data = d.getData();
        int dataLength = d.getLength();
        // fill in the placeholder
        Bytes.putInt(data, 0, dataLength - 4);
        //noinspection SynchronizeOnNonFinalField
        synchronized (this.out) { // FindBugs IS2_INCONSISTENT_SYNC
          out.write(data, 0, dataLength);
          out.flush();
        }
      } catch (IOException e) {
        markClosed(e);
      } finally {
        // the buffer is just an in-memory buffer, but it is still polite to
        // close early
        IOUtils.closeStream(d);
      }
    }
  @Override
  protected void flushAndSync(boolean durable) throws IOException {
    int numReadyBytes = buf.countReadyBytes();
    if (numReadyBytes > 0) {
      int numReadyTxns = buf.countReadyTxns();
      long firstTxToFlush = buf.getFirstReadyTxId();

      assert numReadyTxns > 0;

      // Copy from our double-buffer into a new byte array. This is for
      // two reasons:
      // 1) The IPC code has no way of specifying to send only a slice of
      //    a larger array.
      // 2) because the calls to the underlying nodes are asynchronous, we
      //    need a defensive copy to avoid accidentally mutating the buffer
      //    before it is sent.
      DataOutputBuffer bufToSend = new DataOutputBuffer(numReadyBytes);
      buf.flushTo(bufToSend);
      assert bufToSend.getLength() == numReadyBytes;
      byte[] data = bufToSend.getData();
      assert data.length == bufToSend.getLength();

      QuorumCall<AsyncLogger, Void> qcall =
          loggers.sendEdits(
              segmentTxId, firstTxToFlush,
              numReadyTxns, data);
      loggers.waitForWriteQuorum(qcall, writeTimeoutMs, "sendEdits");

      // Since we successfully wrote this batch, let the loggers know. Any future
      // RPCs will thus let the loggers know of the most recent transaction, even
      // if a logger has fallen behind.
      loggers.setCommittedTxId(firstTxToFlush + numReadyTxns - 1);
    }
  }
Esempio n. 6
0
 private void addTimelineDelegationToken(ContainerLaunchContext clc)
     throws YarnException, IOException {
   Credentials credentials = new Credentials();
   DataInputByteBuffer dibb = new DataInputByteBuffer();
   ByteBuffer tokens = clc.getTokens();
   if (tokens != null) {
     dibb.reset(tokens);
     credentials.readTokenStorageStream(dibb);
     tokens.rewind();
   }
   // If the timeline delegation token is already in the CLC, no need to add
   // one more
   for (org.apache.hadoop.security.token.Token<? extends TokenIdentifier> token :
       credentials.getAllTokens()) {
     if (token.getKind().equals(TimelineDelegationTokenIdentifier.KIND_NAME)) {
       return;
     }
   }
   org.apache.hadoop.security.token.Token<TimelineDelegationTokenIdentifier>
       timelineDelegationToken = getTimelineDelegationToken();
   if (timelineDelegationToken == null) {
     return;
   }
   credentials.addToken(timelineService, timelineDelegationToken);
   if (LOG.isDebugEnabled()) {
     LOG.debug("Add timline delegation token into credentials: " + timelineDelegationToken);
   }
   DataOutputBuffer dob = new DataOutputBuffer();
   credentials.writeTokenStorageToStream(dob);
   tokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
   clc.setTokens(tokens);
 }
 public static ByteBuffer serializeServiceData(Token<JobTokenIdentifier> jobToken)
     throws IOException {
   // TODO these bytes should be versioned
   DataOutputBuffer jobToken_dob = new DataOutputBuffer();
   jobToken.write(jobToken_dob);
   return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength());
 }
 private static void rawValueToTextBytes(
     DataOutputBuffer dataBuffer, DataInputBuffer inputBuffer, TextBytes textOut)
     throws IOException {
   inputBuffer.reset(dataBuffer.getData(), dataBuffer.getLength());
   int newLength = WritableUtils.readVInt(inputBuffer);
   textOut.set(inputBuffer.getData(), inputBuffer.getPosition(), newLength);
 }
Esempio n. 9
0
    public boolean next(LongWritable key, Text value) throws IOException {
      if (pos < end) {
        if (readUntilMatch(startTag, false)) {
          recordStartPos = pos - startTag.length;

          try {
            buffer.write(startTag);
            if (readUntilMatch(endTag, true)) {
              key.set(recordStartPos);
              value.set(buffer.getData(), 0, buffer.getLength());
              return true;
            }
          } finally {
            // Because input streams of gzipped files are not
            // seekable (specifically, do not support getPos), we
            // need to keep track of bytes consumed ourselves.

            // This is a sanity check to make sure our internal
            // computation of bytes consumed is accurate. This
            // should be removed later for efficiency once we
            // confirm that this code works correctly.

            if (fsin instanceof Seekable) {
              if (pos != ((Seekable) fsin).getPos()) {
                throw new RuntimeException("bytes consumed error!");
              }
            }

            buffer.reset();
          }
        }
      }
      return false;
    }
  /**
   * input/output simple records.
   *
   * @throws Exception if test was failed
   */
  @SuppressWarnings("unchecked")
  @Test
  public void simple_record() throws Exception {
    ModelLoader loader = generate();

    Class<?> type = loader.modelType("Simple");
    assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true));
    assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true));

    ModelWrapper object = loader.newModel("Simple");
    DataOutputBuffer output = new DataOutputBuffer();
    ModelOutput<Object> modelOut =
        (ModelOutput<Object>)
            type.getAnnotation(ModelOutputLocation.class)
                .value()
                .getDeclaredConstructor(RecordEmitter.class)
                .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8")));

    object.set("sid", 1L);
    object.set("value", new Text("hello"));
    modelOut.write(object.unwrap());

    object.set("sid", 2L);
    object.set("value", new Text("world"));
    modelOut.write(object.unwrap());

    object.set("sid", 3L);
    object.set("value", null);
    modelOut.write(object.unwrap());
    modelOut.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), output.getLength());
    ModelInput<Object> modelIn =
        (ModelInput<Object>)
            type.getAnnotation(ModelInputLocation.class)
                .value()
                .getDeclaredConstructor(RecordParser.class)
                .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8")));
    ModelWrapper copy = loader.newModel("Simple");

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 1L));
    assertThat(copy.get("value"), is((Object) new Text("hello")));

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 2L));
    assertThat(copy.get("value"), is((Object) new Text("world")));

    modelIn.readTo(copy.unwrap());
    assertThat(copy.get("sid"), is((Object) 3L));
    assertThat(copy.getOption("value").isNull(), is(true));

    assertThat(input.read(), is(-1));
    modelIn.close();
  }
Esempio n. 11
0
 static byte[] write(Writable writable) {
   DataOutputBuffer buffer = new DataOutputBuffer();
   buffer.reset();
   try {
     writable.write(buffer);
   } catch (IOException e) {
     throw new AssertionError(e);
   }
   return Arrays.copyOf(buffer.getData(), buffer.getLength());
 }
Esempio n. 12
0
    /* Write the header for each connection
     * Out is not synchronized because only the first thread does this.
     */
    private void writeHeader() throws IOException {
      out.write(HBaseServer.HEADER.array());
      out.write(HBaseServer.CURRENT_VERSION);
      // When there are more fields we can have ConnectionHeader Writable.
      DataOutputBuffer buf = new DataOutputBuffer();
      header.write(buf);

      int bufLen = buf.getLength();
      out.writeInt(bufLen);
      out.write(buf.getData(), 0, bufLen);
    }
 @Deprecated
 private byte[] getBytes(HRegionInfo hri) throws IOException {
   DataOutputBuffer out = new DataOutputBuffer();
   try {
     hri.write(out);
     return out.getData();
   } finally {
     if (out != null) {
       out.close();
     }
   }
 }
  /**
   * all primitive types.
   *
   * @throws Exception if test was failed
   */
  @SuppressWarnings("unchecked")
  @Test
  public void primitives() throws Exception {
    ModelLoader loader = generate();

    Class<?> type = loader.modelType("Primitives");
    assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true));
    assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true));

    ModelWrapper object = loader.newModel("Primitives");

    object.set("type_boolean", true);
    object.set("type_byte", (byte) 64);
    object.set("type_short", (short) 256);
    object.set("type_int", 100);
    object.set("type_long", 200L);
    object.set("type_float", 300.f);
    object.set("type_double", 400.d);
    object.set("type_decimal", new BigDecimal("1234.567"));
    object.set("type_text", new Text("Hello, world!"));
    object.set("type_date", new Date(2011, 3, 31));
    object.set("type_datetime", new DateTime(2011, 3, 31, 23, 30, 1));

    DataOutputBuffer output = new DataOutputBuffer();
    ModelOutput<Object> modelOut =
        (ModelOutput<Object>)
            type.getAnnotation(ModelOutputLocation.class)
                .value()
                .getDeclaredConstructor(RecordEmitter.class)
                .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8")));
    modelOut.write(object.unwrap());
    modelOut.write(object.unwrap());
    modelOut.write(object.unwrap());
    modelOut.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), output.getLength());
    ModelInput<Object> modelIn =
        (ModelInput<Object>)
            type.getAnnotation(ModelInputLocation.class)
                .value()
                .getDeclaredConstructor(RecordParser.class)
                .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8")));
    ModelWrapper copy = loader.newModel("Primitives");
    modelIn.readTo(copy.unwrap());
    assertThat(object.unwrap(), equalTo(copy.unwrap()));
    assertThat(input.read(), is(-1));
    modelIn.close();
  }
Esempio n. 15
0
  protected static void assertSerializable(HadoopSerialization ser, ITuple tuple, boolean debug)
      throws IOException {
    DataInputBuffer input = new DataInputBuffer();
    DataOutputBuffer output = new DataOutputBuffer();
    DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>(tuple);
    ser.ser(wrapper, output);

    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> wrapper2 = new DatumWrapper<ITuple>();

    wrapper2 = ser.deser(wrapper2, input);
    if (debug) {
      System.out.println("D:" + wrapper2.datum());
    }
    assertEquals(tuple, wrapper2.datum());
  }
Esempio n. 16
0
    private boolean readUntilMatch(byte[] match, boolean withinBlock) throws IOException {
      int i = 0;
      while (true) {
        int b = fsin.read();
        // Increment position (bytes consumed).
        pos++;

        // End of file:
        if (b == -1) {
          return false;
        }
        // Save to buffer:
        if (withinBlock) {
          buffer.write(b);
        }
        // Check if we're matching:
        if (b == match[i]) {
          i++;
          if (i >= match.length) {
            return true;
          }
        } else {
          i = 0;
        }
        // See if we've passed the stop point:
        if (!withinBlock && i == 0 && pos >= end) {
          return false;
        }
      }
    }
 /** Read raw bytes from a SequenceFile. */
 public synchronized boolean nextKeyValue() throws IOException, InterruptedException {
   if (done) {
     return false;
   }
   long pos = in.getPosition();
   key.set(pos);
   info.setPosition(pos);
   boolean eof = -1 == in.nextRawKey(buffer);
   if (!eof) {
     in.nextRawValue(vbytes);
     value.set(
         buffer.getLength(), vbytes.getSize(), (int) (in.getPosition() - pos), in.syncSeen());
   }
   buffer.reset();
   return !(done = (eof || (pos >= end && in.syncSeen())));
 }
    private boolean readUntilMatch(byte[] match, boolean withinBlock) throws IOException {
      int i = 0;
      while (true) {
        int b = fsin.read();
        // end of file:
        if (b == -1) {
          return false;
        }
        // save to buffer:
        if (withinBlock) {
          buffer.write(b);
        }

        // check if we're matching:
        if (b == match[i]) {
          i++;
          if (i >= match.length) {
            return true;
          }
        } else {
          i = 0;
        }
        // see if we've passed the stop point:
        if (!withinBlock && i == 0 && fsin.getPos() >= end) {
          return false;
        }
      }
    }
Esempio n. 19
0
  /** Used by child copy constructors. */
  protected synchronized void copy(Writable other) {
    if (other != null) {
      try {
        DataOutputBuffer out = new DataOutputBuffer();
        other.write(out);
        DataInputBuffer in = new DataInputBuffer();
        in.reset(out.getData(), out.getLength());
        readFields(in);

      } catch (IOException e) {
        throw new IllegalArgumentException("map cannot be copied: " + e.getMessage());
      }

    } else {
      throw new IllegalArgumentException("source map cannot be null");
    }
  }
Esempio n. 20
0
 @Test
 public void testReadWriteReplicaState() {
   try {
     DataOutputBuffer out = new DataOutputBuffer();
     DataInputBuffer in = new DataInputBuffer();
     for (HdfsServerConstants.ReplicaState repState : HdfsServerConstants.ReplicaState.values()) {
       repState.write(out);
       in.reset(out.getData(), out.getLength());
       HdfsServerConstants.ReplicaState result = HdfsServerConstants.ReplicaState.read(in);
       assertTrue("testReadWrite error !!!", repState == result);
       out.reset();
       in.reset();
     }
   } catch (Exception ex) {
     fail("testReadWrite ex error ReplicaState");
   }
 }
Esempio n. 21
0
 @Override
 public boolean nextKeyValue() throws IOException, InterruptedException {
   if (fsin.getPos() < end) {
     if (readUntilMatch(startTag, false)) {
       try {
         buffer.write(startTag);
         if (readUntilMatch(endTag, true)) {
           key.set(fsin.getPos());
           value.set(buffer.getData(), 0, buffer.getLength());
           return true;
         }
       } finally {
         buffer.reset();
       }
     }
   }
   return false;
 }
Esempio n. 22
0
  protected static void assertSerializable(
      TupleSerializer ser, TupleDeserializer deser, DatumWrapper<ITuple> tuple, boolean debug)
      throws IOException {
    DataOutputBuffer output = new DataOutputBuffer();
    ser.open(output);
    ser.serialize(tuple);
    ser.close();

    DataInputBuffer input = new DataInputBuffer();
    input.reset(output.getData(), 0, output.getLength());
    DatumWrapper<ITuple> deserializedTuple = new DatumWrapper<ITuple>();

    deser.open(input);
    deserializedTuple = deser.deserialize(deserializedTuple);
    deser.close();

    if (debug) {
      System.out.println("D:" + deserializedTuple.datum());
    }
    assertEquals(tuple.datum(), deserializedTuple.datum());
  }
 protected boolean next(Text key, Text value) throws IOException {
   if (fsin.getPos() < end) {
     try {
       if (readUntilMatch(START_TITLE_MARKER, false)) {
         if (readUntilMatch(END_TITLE_MARKER, true)) {
           int stop = buffer.getLength() - END_TITLE_MARKER.length;
           key.set(buffer.getData(), 0, stop);
           buffer.reset();
           if (readUntilMatch(START_TEXT_MARKER, false)) {
             if (readUntilMatch(END_TEXT_MARKER, true)) {
               // un-escape the XML entities encoding and
               // re-encode the result as raw UTF8 bytes
               stop = buffer.getLength() - END_TITLE_MARKER.length;
               String xmlEscapedContent = new String(buffer.getData(), 0, stop + 1, UTF8);
               value.set(StringEscapeUtils.unescapeXml(xmlEscapedContent).getBytes(UTF8));
               return true;
             }
           }
         }
       }
     } finally {
       buffer.reset();
     }
   }
   return false;
 }
Esempio n. 24
0
  private ByteBuffer getSecurityTokens() throws IOException {
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    Closer closer = Closer.create();
    try {
      DataOutputBuffer dataOutputBuffer = closer.register(new DataOutputBuffer());
      credentials.writeTokenStorageToStream(dataOutputBuffer);

      // Remove the AM->RM token so that containers cannot access it
      Iterator<Token<?>> tokenIterator = credentials.getAllTokens().iterator();
      while (tokenIterator.hasNext()) {
        Token<?> token = tokenIterator.next();
        if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
          tokenIterator.remove();
        }
      }

      return ByteBuffer.wrap(dataOutputBuffer.getData(), 0, dataOutputBuffer.getLength());
    } catch (Throwable t) {
      throw closer.rethrow(t);
    } finally {
      closer.close();
    }
  }
  @Test(timeout = 5000)
  public void testGetBytePayload() throws IOException {
    int numBuckets = 10;
    VertexManagerPluginContext context = mock(VertexManagerPluginContext.class);
    CustomVertexConfiguration vertexConf =
        new CustomVertexConfiguration(numBuckets, TezWork.VertexType.INITIALIZED_EDGES);
    DataOutputBuffer dob = new DataOutputBuffer();
    vertexConf.write(dob);
    UserPayload payload = UserPayload.create(ByteBuffer.wrap(dob.getData()));
    when(context.getUserPayload()).thenReturn(payload);

    CustomPartitionVertex vm = new CustomPartitionVertex(context);
    vm.initialize();

    // prepare empty routing table
    Multimap<Integer, Integer> routingTable = HashMultimap.<Integer, Integer>create();
    payload = vm.getBytePayload(routingTable);
    // get conf from user payload
    CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration();
    DataInputByteBuffer dibb = new DataInputByteBuffer();
    dibb.reset(payload.getPayload());
    edgeConf.readFields(dibb);
    assertEquals(numBuckets, edgeConf.getNumBuckets());
  }
Esempio n. 26
0
  private int compareInBinary1(SortComparator comp, ITuple tuple1, ITuple tuple2)
      throws IOException {
    DataOutputBuffer buffer1 = new DataOutputBuffer();
    ser.ser(new DatumWrapper(tuple1), buffer1);

    DataOutputBuffer buffer2 = new DataOutputBuffer();
    ser.ser(new DatumWrapper(tuple2), buffer2);

    return comp.compare(
        buffer1.getData(), 0, buffer1.getLength(), buffer2.getData(), 0, buffer2.getLength());
  }
Esempio n. 27
0
 public int read() throws IOException {
   int ret;
   if (null == inbuf || -1 == (ret = inbuf.read())) {
     if (!r.next(key, val)) {
       return -1;
     }
     byte[] tmp = key.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\t');
     tmp = val.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\n');
     inbuf.reset(outbuf.getData(), outbuf.getLength());
     outbuf.reset();
     ret = inbuf.read();
   }
   return ret;
 }
Esempio n. 28
0
  @Override
  // Read the page header
  // -1: EOF
  // 1 - outside the <page> tag
  // 2 - just passed the <page> tag but outside the <title>
  // 3 - just passed the <title> tag
  // 4 - just passed the </title> tag but outside the <namespace>
  // 5 - just passed the <namespace>
  // 6 - just passed the </namespace> but outside the <id>
  // 7 - just passed the (page's) <id>
  // 8 - just passed the </id> tag but outside the <revision>
  // 9 - (optionally) just passed the <redirect>
  // 10 - just passed the (next) <revision>
  protected Ack readToPageHeader(RevisionHeader meta) throws IOException {
    int i = 0;
    int flag = 2;
    boolean skipped = false;
    int revOrRedirect = -1;
    try (DataOutputBuffer pageTitle = new DataOutputBuffer();
        DataOutputBuffer nsBuf = new DataOutputBuffer();
        DataOutputBuffer keyBuf = new DataOutputBuffer()) {

      while (true) {
        if (!fetchMore()) return Ack.EOF;
        while (hasData()) {
          byte b = nextByte();

          // when passing the namespace and we realize that
          // this is not an article, and that the option of skipping
          // non-article pages is on, we simply skip everything until
          // the closing </page>
          if (skipped) {
            if (flag >= 6) {
              Log.warn("Peculiar read after skipping namespace");
              /*
              if (b == END_PAGE[i]) {
              	i++;
              } else i = 0;
              if (i >= END_PAGE.length) {
              	return Ack.SKIPPED;
              } */
              return Ack.FAILED;
            } else return Ack.SKIPPED;
          }

          if (flag == 2) {
            if (b == START_TITLE[i]) {
              i++;
            } else i = 0;
            if (i >= START_TITLE.length) {
              flag = 3;
              i = 0;
            }
          }

          // put everything between <title></title> block into title
          else if (flag == 3) {
            if (b == END_TITLE[i]) {
              i++;
            } else i = 0;
            pageTitle.write(b);
            if (i >= END_TITLE.length) {
              flag = 4;
              String title =
                  new String(pageTitle.getData(), 0, pageTitle.getLength() - END_TITLE.length);
              meta.setPageTitle(title);
              pageTitle.reset();
              i = 0;
            }
          } else if (flag == 4) {
            if (b == START_NAMESPACE[i]) {
              i++;
            } else i = 0;
            if (i >= START_NAMESPACE.length) {
              flag = 5;
              i = 0;
            }
          } else if (flag == 5) {
            if (b == END_NAMESPACE[i]) {
              i++;
            } else i = 0;
            nsBuf.write(b);
            if (i >= END_NAMESPACE.length) {
              flag = 6;
              String nsStr =
                  new String(nsBuf.getData(), 0, nsBuf.getLength() - END_NAMESPACE.length);
              int ns = Integer.parseInt(nsStr);
              nsBuf.reset();
              if (ns != 0) {
                if (skipNonArticles) {
                  skipped = true;
                  meta.clear();
                  return Ack.SKIPPED;
                }
              }
              meta.setNamespace(ns);
              i = 0;
            }
          } else if (flag == 6) {
            if (b == START_ID[i]) {
              i++;
            } else i = 0;
            if (i >= START_ID.length) {
              flag = 7;
              i = 0;
            }
          }

          // put everything in outer <id></id> block into keyBuf
          else if (flag == 7) {
            if (b == END_ID[i]) {
              i++;
            } else i = 0;
            keyBuf.write(b);
            if (i >= END_ID.length) {
              flag = 8;
              String idStr = new String(keyBuf.getData(), 0, keyBuf.getLength() - END_ID.length);
              long pageId = Long.parseLong(idStr);
              meta.setPageId(pageId);
              i = 0;
            }
          } else if (flag == 8) {
            int curMatch = 0;
            if ((i < START_REVISION.length && b == START_REVISION[i])
                && (i < START_REDIRECT.length && b == START_REDIRECT[i])

                // subtle bug here: some tag names can overlap
                // multiple times
                && (revOrRedirect == 3 || revOrRedirect == -1)) {
              curMatch = 3;
            } else if (i < START_REVISION.length && b == START_REVISION[i] && revOrRedirect != 2) {
              curMatch = 1;
            } else if (i < START_REDIRECT.length && b == START_REDIRECT[i] && revOrRedirect != 1) {
              curMatch = 2;
            } else {
              curMatch = 0;
            }
            if (curMatch > 0 && (i == 0 || revOrRedirect == 3 || curMatch == revOrRedirect)) {
              i++;
              revOrRedirect = curMatch;
            } else i = 0;
            if ((revOrRedirect == 2 || revOrRedirect == 3) && i >= START_REDIRECT.length) {
              if (skipRedirect) {
                skipped = true;
                meta.clear();
                return Ack.SKIPPED;
              }
              revOrRedirect = -1;
              flag = 9;
              i = 0;
            } else if ((revOrRedirect == 1 || revOrRedirect == 3) && i >= START_REVISION.length) {
              flag = 10;
              revOrRedirect = -1;
              return Ack.PASSED_TO_NEXT_TAG;
            }
          } else if (flag == 9 && !skipRedirect) {
            if (b == START_REVISION[i]) {
              i++;
            } else i = 0;
            if (i >= START_REVISION.length) {
              flag = 10;
              return Ack.PASSED_TO_NEXT_TAG;
            }
          }
        }
      }
    }
  }
Esempio n. 29
0
  /*
   * Helper function to create an edge property from an edge type.
   */
  private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp) throws IOException {
    DataMovementType dataMovementType;
    Class logicalInputClass;
    Class logicalOutputClass;

    EdgeProperty edgeProperty = null;
    EdgeType edgeType = edgeProp.getEdgeType();
    switch (edgeType) {
      case BROADCAST_EDGE:
        dataMovementType = DataMovementType.BROADCAST;
        logicalOutputClass = OnFileUnorderedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        break;

      case CUSTOM_EDGE:
        dataMovementType = DataMovementType.CUSTOM;
        logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        EdgeManagerDescriptor edgeDesc =
            new EdgeManagerDescriptor(CustomPartitionEdge.class.getName());
        CustomEdgeConfiguration edgeConf =
            new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
        DataOutputBuffer dob = new DataOutputBuffer();
        edgeConf.write(dob);
        byte[] userPayload = dob.getData();
        edgeDesc.setUserPayload(userPayload);
        edgeProperty =
            new EdgeProperty(
                edgeDesc,
                DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                new OutputDescriptor(logicalOutputClass.getName()),
                new InputDescriptor(logicalInputClass.getName()));
        break;

      case CUSTOM_SIMPLE_EDGE:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        break;

      case SIMPLE_EDGE:
      default:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileSortedOutput.class;
        logicalInputClass = ShuffledMergedInputLegacy.class;
        break;
    }

    if (edgeProperty == null) {
      edgeProperty =
          new EdgeProperty(
              dataMovementType,
              DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL,
              new OutputDescriptor(logicalOutputClass.getName()),
              new InputDescriptor(logicalInputClass.getName()));
    }

    return edgeProperty;
  }
Esempio n. 30
0
  /**
   * create inmemory segments
   *
   * @return
   * @throws IOException
   */
  public List<TezMerger.Segment> createInMemStreams() throws IOException {
    int numberOfStreams = Math.max(2, rnd.nextInt(10));
    LOG.info("No of streams : " + numberOfStreams);

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(keyClass);
    Serializer valueSerializer = serializationFactory.getSerializer(valClass);

    LocalDirAllocator localDirAllocator =
        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext context = createTezInputContext();
    MergeManager mergeManager =
        new MergeManager(
            conf,
            fs,
            localDirAllocator,
            context,
            null,
            null,
            null,
            null,
            null,
            1024 * 1024 * 10,
            null,
            false,
            -1);

    DataOutputBuffer keyBuf = new DataOutputBuffer();
    DataOutputBuffer valBuf = new DataOutputBuffer();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    keySerializer.open(keyBuf);
    valueSerializer.open(valBuf);

    List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
    for (int i = 0; i < numberOfStreams; i++) {
      BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
      InMemoryWriter writer = new InMemoryWriter(bout);
      Map<Writable, Writable> data = createData();
      // write data
      for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
        keySerializer.serialize(entry.getKey());
        valueSerializer.serialize(entry.getValue());
        keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
        valIn.reset(valBuf.getData(), 0, valBuf.getLength());
        writer.append(keyIn, valIn);
        originalData.put(entry.getKey(), entry.getValue());
        keyBuf.reset();
        valBuf.reset();
        keyIn.reset();
        valIn.reset();
      }
      IFile.Reader reader =
          new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
      segments.add(new TezMerger.Segment(reader, true));

      data.clear();
      writer.close();
    }
    return segments;
  }