Exemplo n.º 1
0
  @Override
  public void onTrigger(final ProcessContext context, final ProcessSession session)
      throws ProcessException {

    FlowFile flowFile = null;
    if (context.hasIncomingConnection()) {
      flowFile = session.get();

      // If we have no FlowFile, and all incoming connections are self-loops then we can continue
      // on.
      // However, if we have no FlowFile and we have connections coming from other Processors, then
      // we know that we should run only if we have a FlowFile.
      if (flowFile == null && context.hasNonLoopConnection()) {
        return;
      }
    }

    OkHttpClient okHttpClient = getClient();

    if (flowFile == null) {
      flowFile = session.create();
    }

    final String index =
        context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId =
        context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType =
        context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final String fields =
        context.getProperty(FIELDS).isSet()
            ? context.getProperty(FIELDS).evaluateAttributeExpressions(flowFile).getValue()
            : null;

    // Authentication
    final String username =
        context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
    final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions().getValue();

    final ComponentLog logger = getLogger();

    Response getResponse = null;

    try {
      logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] {index, docType, docId});

      // read the url property from the context
      final String urlstr =
          StringUtils.trimToEmpty(
              context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
      final URL url = buildRequestURL(urlstr, docId, index, docType, fields);
      final long startNanos = System.nanoTime();

      getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "GET", null);
      final int statusCode = getResponse.code();

      if (isSuccess(statusCode)) {
        ResponseBody body = getResponse.body();
        final byte[] bodyBytes = body.bytes();
        JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
        boolean found = responseJson.get("found").asBoolean(false);
        String retrievedIndex = responseJson.get("_index").asText();
        String retrievedType = responseJson.get("_type").asText();
        String retrievedId = responseJson.get("_id").asText();

        if (found) {
          JsonNode source = responseJson.get("_source");
          flowFile = session.putAttribute(flowFile, "filename", retrievedId);
          flowFile = session.putAttribute(flowFile, "es.index", retrievedIndex);
          flowFile = session.putAttribute(flowFile, "es.type", retrievedType);
          if (source != null) {
            flowFile =
                session.write(
                    flowFile,
                    out -> {
                      out.write(source.toString().getBytes());
                    });
          }
          logger.debug("Elasticsearch document " + retrievedId + " fetched, routing to success");

          // emit provenance event
          final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
          if (context.hasNonLoopConnection()) {
            session.getProvenanceReporter().fetch(flowFile, url.toExternalForm(), millis);
          } else {
            session.getProvenanceReporter().receive(flowFile, url.toExternalForm(), millis);
          }
          session.transfer(flowFile, REL_SUCCESS);
        } else {
          logger.warn(
              "Failed to read {}/{}/{} from Elasticsearch: Document not found",
              new Object[] {index, docType, docId});

          // We couldn't find the document, so send it to "not found"
          session.transfer(flowFile, REL_NOT_FOUND);
        }
      } else {
        if (statusCode == 404) {
          logger.warn(
              "Failed to read {}/{}/{} from Elasticsearch: Document not found",
              new Object[] {index, docType, docId});

          // We couldn't find the document, so penalize it and send it to "not found"
          session.transfer(flowFile, REL_NOT_FOUND);
        } else {
          // 5xx -> RETRY, but a server error might last a while, so yield
          if (statusCode / 100 == 5) {

            logger.warn(
                "Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...",
                new Object[] {statusCode, getResponse.message()});
            session.transfer(flowFile, REL_RETRY);
            context.yield();
          } else if (context.hasIncomingConnection()) { // 1xx, 3xx, 4xx -> NO RETRY
            logger.warn(
                "Elasticsearch returned code {} with message {}, transferring flow file to failure",
                new Object[] {statusCode, getResponse.message()});
            session.transfer(flowFile, REL_FAILURE);
          } else {
            logger.warn(
                "Elasticsearch returned code {} with message {}",
                new Object[] {statusCode, getResponse.message()});
            session.remove(flowFile);
          }
        }
      }
    } catch (IOException ioe) {
      logger.error(
          "Failed to read from Elasticsearch due to {}, this may indicate an error in configuration "
              + "(hosts, username/password, etc.). Routing to retry",
          new Object[] {ioe.getLocalizedMessage()},
          ioe);
      if (context.hasIncomingConnection()) {
        session.transfer(flowFile, REL_RETRY);
      } else {
        session.remove(flowFile);
      }
      context.yield();

    } catch (Exception e) {
      logger.error(
          "Failed to read {} from Elasticsearch due to {}",
          new Object[] {flowFile, e.getLocalizedMessage()},
          e);
      if (context.hasIncomingConnection()) {
        session.transfer(flowFile, REL_FAILURE);
      } else {
        session.remove(flowFile);
      }
      context.yield();
    } finally {
      if (getResponse != null) {
        getResponse.close();
      }
    }
  }