private void doSetupMockRequest(
      final String contentType, final ByteArrayEntity entity, final int status) throws IOException {
    final Header contentTypeHeader = new BasicHeader("Content-Type", contentType);
    final Header[] linkHeaders = new Header[] {};

    when(mockHttpclient.execute(any(HttpUriRequest.class))).thenReturn(mockResponse);
    when(mockResponse.getFirstHeader("Location")).thenReturn(null);
    when(mockResponse.getFirstHeader("Content-Type")).thenReturn(contentTypeHeader);
    when(mockResponse.getHeaders("Link")).thenReturn(linkHeaders);
    when(mockResponse.getEntity()).thenReturn(entity);
    when(mockResponse.getStatusLine()).thenReturn(mockStatus);
    when(mockStatus.getStatusCode()).thenReturn(status);
  }
示例#2
0
  private static void assert304NotModified(
      CloseableHttpResponse res, String expectedLastModified, String expectedContentType) {

    assertStatusLine(res, "HTTP/1.1 304 Not Modified");

    // Ensure that the 'Last-Modified' header did not change.
    assertThat(res.getFirstHeader(HttpHeaders.LAST_MODIFIED).getValue(), is(expectedLastModified));

    // Ensure that the content does not exist but its type does.
    assertThat(res.getEntity(), is(nullValue()));

    assertThat(res.containsHeader(HttpHeaders.CONTENT_TYPE), is(true));
    assertThat(
        res.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue(), startsWith(expectedContentType));
  }
示例#3
0
文件: Fetcher.java 项目: mpermana/pet
  public static Object fetch(DocumentSource source) {
    try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
      URIBuilder builder = new URIBuilder(source.url);
      for (Entry<String, String> it : source.parameters.entrySet()) {
        builder.addParameter(it.getKey(), it.getValue());
      }
      URI uri = builder.build();

      HttpUriRequest request = new HttpGet(uri);
      request.addHeader("Accept", "application/json");
      CloseableHttpResponse response = httpclient.execute(request);

      String headers = response.getFirstHeader("Content-Type").getValue();
      InputStream inputStream = response.getEntity().getContent();
      if (headers.contains("text/html")) {
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        Document document = Jsoup.parse(inputStream, null, source.url);
        return document;
      } else if (headers.contains("json")) {
        ObjectMapper om = new ObjectMapper();
        return om.readValue(inputStream, HashMap.class);
      } else {
        IOUtils.copy(inputStream, System.err);
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    return null;
  }
示例#4
0
  public static void main(String[] args) throws ClientProtocolException, IOException {
    try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {

      // HttpClient httpClient = getTestHttpClient();
      // HttpHost proxy = new HttpHost("127.0.0.1", 8888);
      // httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,
      // proxy);

      String apiKey = args[0];
      String latitude = "46.947922";
      String longitude = "7.444608";

      String urlTemplate = "https://api.forecast.io/forecast/%s/%s,%s";
      String url = String.format(urlTemplate, apiKey, latitude, longitude);
      System.out.println(url);

      HttpGet httpget = new HttpGet(url);
      try (CloseableHttpResponse response = httpClient.execute(httpget)) {
        System.out.println(response.getFirstHeader("Content-Encoding"));
        HeaderIterator it = response.headerIterator();
        while (it.hasNext()) {
          Object header = it.next();
          System.out.println(header);
        }

        HttpEntity entity = response.getEntity();
        String jsonData = EntityUtils.toString(entity, StandardCharsets.UTF_8);

        System.out.println(jsonData);
      }
    }
  }
示例#5
0
 public InputStream getInputStream() throws Exception {
   httpClient = HttpClients.createDefault();
   HttpGet httpGet = new HttpGet(uri);
   System.out.println(uri);
   RequestConfig requestConfig =
       RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).build();
   httpGet.setConfig(requestConfig);
   // DefaultHttpRequestRetryHandler handler = new DefaultHttpRequestRetryHandler(3, false);
   // httpClient.setHttpRequestRetryHandler(handler);
   response = httpClient.execute(httpGet);
   System.out.println(response.getStatusLine());
   if (HttpStatus.SC_OK != response.getStatusLine().getStatusCode()) {
     // System.out.println("Connect Error");
     return null;
   }
   if (key != null) {
     Header header = response.getFirstHeader(key);
     if (header == null) {
       // not valid date
       // System.out.println("Not valid date.");
       return null;
     }
   }
   entity = response.getEntity();
   return entity.getContent();
 }
示例#6
0
 public void getTPBShows() {
   CloseableHttpClient httpClient = HttpClientBuilder.create().build();
   CloseableHttpResponse response = null;
   try {
     HttpGet httpGet = new HttpGet(pageURL);
     httpGet.addHeader(
         "User-Agent",
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36");
     response = httpClient.execute(httpGet);
     HttpEntity httpEntity = response.getEntity();
     Header contentType = response.getFirstHeader("Content-Type");
     String[] contentArray = contentType.getValue().split(";");
     String charset = "UTF-8";
     // String mimeType = contentArray[0].trim();
     if (contentArray.length > 1 && contentArray[1].contains("=")) {
       charset = contentArray[1].trim().split("=")[1];
     }
     Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath());
     Element results = pageDoc.getElementById("searchResult");
     response.close();
     Elements rawShowObjects = results.select("td.vertTh+td");
     TPBToTvShowEpisode makeShows = new TPBToTvShowEpisode();
     List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects);
     DBActions.insertTvEpisodes(theShows, pageURL);
   } catch (MalformedURLException MURLe) {
     // Utilities.sendExceptionEmail(MURLe.getMessage());
     MURLe.printStackTrace();
   } catch (Exception e) {
     // Utilities.sendExceptionEmail(e.getMessage());
     e.printStackTrace();
   }
 }
示例#7
0
 @Test
 public void testDefaultHandlerFavicon() throws Exception {
   try (CloseableHttpClient hc = HttpClients.createMinimal()) {
     try (CloseableHttpResponse res = hc.execute(new HttpGet(uri("/default/favicon.ico")))) {
       assertThat(res.getStatusLine().toString(), is("HTTP/1.1 200 OK"));
       assertThat(
           res.getFirstHeader(HttpHeaderNames.CONTENT_TYPE.toString()).getValue(),
           startsWith("image/x-icon"));
       assertThat(EntityUtils.toByteArray(res.getEntity()).length, is(greaterThan(0)));
     }
   }
 }
示例#8
0
  private static String assert200Ok(
      CloseableHttpResponse res, String expectedContentType, String expectedContent)
      throws Exception {

    assertStatusLine(res, "HTTP/1.1 200 OK");

    // Ensure that the 'Last-Modified' header exists and is well-formed.
    final String lastModified;
    assertThat(res.containsHeader(HttpHeaders.LAST_MODIFIED), is(true));
    lastModified = res.getFirstHeader(HttpHeaders.LAST_MODIFIED).getValue();
    HttpHeaderDateFormat.get().parse(lastModified);

    // Ensure the content and its type are correct.
    assertThat(EntityUtils.toString(res.getEntity()), is(expectedContent));

    assertThat(res.containsHeader(HttpHeaders.CONTENT_TYPE), is(true));
    assertThat(
        res.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue(), startsWith(expectedContentType));

    return lastModified;
  }
示例#9
0
 public void getIPTShows() {
   CloseableHttpClient httpClient = HttpClientBuilder.create().build();
   CloseableHttpResponse response = null;
   String pageURL = "https://www.iptorrents.com";
   try {
     HttpGet httpGet = new HttpGet(pageURL);
     httpGet.addHeader(
         "User-Agent",
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36");
     response = httpClient.execute(httpGet);
     response.removeHeaders("Transfer-Encoding");
     HttpPost thePost = new HttpPost(pageURL + "?username=mcpchelper81&password=ru68ce48&php=");
     thePost.setHeaders(response.getAllHeaders());
     response.close();
     response = null;
     response = httpClient.execute(thePost);
     httpGet = new HttpGet("https://www.iptorrents.com/t?5");
     httpGet.setHeaders(response.getHeaders("set-cookie"));
     httpGet.addHeader(
         "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     httpGet.addHeader("accept-encoding", "gzip, deflate, sdch");
     httpGet.addHeader("accept-language", "en-US,en;q=0.8");
     httpGet.addHeader("dnt", "1");
     httpGet.addHeader("upgrade-insecure-requests", "1");
     httpGet.addHeader(
         "user-agent",
         "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
     response.close();
     response = null;
     response = httpClient.execute(httpGet);
     Header contentType = response.getFirstHeader("Content-Type");
     HttpEntity httpEntity = response.getEntity();
     String[] contentArray = contentType.getValue().split(";");
     String charset = "UTF-8";
     if (contentArray.length > 1 && contentArray[1].contains("=")) {
       charset = contentArray[1].trim().split("=")[1];
     }
     Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath());
     Elements results = pageDoc.getElementsByClass("torrents");
     response.close();
     Elements rawShowObjects = results.select("tr");
     IPTToTvShowEpisode makeShows = new IPTToTvShowEpisode();
     List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects);
     DBActions.insertIPTTvEpisodes(theShows, "https://www.iptorrents.com/t?5");
   } catch (MalformedURLException MURLe) {
     MURLe.printStackTrace();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
 /**
  * 执行请求,返回字节
  *
  * @param charset
  * @param httpUriRequest
  * @return
  */
 public byte[] execute_byte(HttpUriRequest httpUriRequest, Map<String, String> header) {
   byte[] data = null;
   HttpEntity entity = null;
   try {
     CloseableHttpResponse httpResponse = httpclient.execute(httpUriRequest);
     int statusCode = httpResponse.getStatusLine().getStatusCode();
     entity = httpResponse.getEntity();
     Header heade = entity.getContentType();
     if (heade != null) {
       log.info("statusCode : " + statusCode + " ContentType : " + heade.getValue());
       setContent_type(heade.getValue());
     } else {
       log.info("statusCode : " + statusCode + " ContentType : unknown .");
     }
     setStatusCode(statusCode);
     if (statusCode == 200) {
       data = EntityUtils.toByteArray(entity);
     } else if (statusCode == 302 || statusCode == 300 || statusCode == 301) {
       URL referer = httpUriRequest.getURI().toURL();
       httpUriRequest.abort();
       Header location = httpResponse.getFirstHeader("Location");
       String locationurl = location.getValue();
       if (!locationurl.startsWith("http")) {
         URL u = new URL(referer, locationurl);
         locationurl = u.toExternalForm();
       }
       data = GetImg(locationurl, header);
     } else {
       data = EntityUtils.toByteArray(entity);
     }
   } catch (ClientProtocolException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
     if (httpUriRequest != null) {
       httpUriRequest.abort();
     }
     if (entity != null) {
       EntityUtils.consumeQuietly(entity);
     }
   }
   return data;
 }
 /**
  * 执行请求,返回文字
  *
  * @param charset
  * @param httpUriRequest
  * @return
  */
 public String execute_text(
     String charset, Map<String, String> header, HttpUriRequest httpUriRequest) {
   String text = "";
   try {
     CloseableHttpResponse httpResponse = httpclient.execute(httpUriRequest);
     int statusCode = httpResponse.getStatusLine().getStatusCode();
     Header heade = httpResponse.getEntity().getContentType();
     if (heade != null) {
       setContent_type(heade.getValue());
       log.info("statusCode : " + statusCode + " ContentType : " + heade.getValue());
     } else {
       log.info("statusCode : " + statusCode + " ContentType : unknown .");
     }
     setStatusCode(statusCode);
     if (statusCode == 200) {
       text = getContent(charset, httpResponse);
     } else if (statusCode == 302 || statusCode == 300 || statusCode == 301) {
       URL referer = httpUriRequest.getURI().toURL();
       httpUriRequest.abort();
       Header location = httpResponse.getFirstHeader("Location");
       String locationurl = location.getValue();
       if (!locationurl.startsWith("http")) {
         URL u = new URL(referer, locationurl);
         locationurl = u.toExternalForm();
       }
       text = Get(locationurl, header, charset);
     } else {
       text = getContent(charset, httpResponse);
     }
   } catch (ClientProtocolException e) {
     log.error(e.getMessage());
   } catch (IOException e) {
     log.error(e.getMessage());
   } catch (Exception e) {
     log.error(e.getMessage());
   } finally {
     if (httpUriRequest != null) {
       httpUriRequest.abort();
     }
   }
   return text;
 }
示例#12
0
 public void getKATorrents() {
   CloseableHttpClient httpClient = HttpClientBuilder.create().build();
   CloseableHttpResponse response = null;
   try {
     HttpGet httpGet = new HttpGet("https://kat.cr/tv/?field=time_add&sorder=desc");
     httpGet.addHeader(
         "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     httpGet.addHeader("accept-encoding", "gzip, deflate, sdch");
     httpGet.addHeader("accept-language", "en-US,en;q=0.8");
     httpGet.addHeader("dnt", "1");
     httpGet.addHeader("upgrade-insecure-requests", "1");
     httpGet.addHeader(
         "user-agent",
         "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
     response = httpClient.execute(httpGet);
     Header contentType = response.getFirstHeader("Content-Type");
     HttpEntity httpEntity = response.getEntity();
     String[] contentArray = contentType.getValue().split(";");
     String charset = "UTF-8";
     if (contentArray.length > 1 && contentArray[1].contains("=")) {
       charset = contentArray[1].trim().split("=")[1];
     }
     Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath());
     Elements oddResults = pageDoc.getElementsByClass("odd");
     Elements evenResults = pageDoc.getElementsByClass("even");
     Elements allshows = new Elements();
     for (int i = 0; i < evenResults.size(); i++) {
       allshows.add(oddResults.get(i));
       allshows.add(evenResults.get(i));
     }
     allshows.add(oddResults.last());
     response.close();
     KATToTvShowEpisode kat = new KATToTvShowEpisode();
     List<TvShowEpisode> theShows = kat.makeKATBeans(allshows);
     DBActions.insertTvEpisodes(theShows, "https://kat.cr/tv/?field=time_add&sorder=desc");
   } catch (MalformedURLException MURLe) {
     MURLe.printStackTrace();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
示例#13
0
  @Override
  public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final boolean sendAsFlowFile = context.getProperty(SEND_AS_FLOWFILE).asBoolean();
    final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
    final String userAgent = context.getProperty(USER_AGENT).getValue();

    final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
    requestConfigBuilder.setConnectionRequestTimeout(
        context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    requestConfigBuilder.setConnectTimeout(
        context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    requestConfigBuilder.setRedirectsEnabled(false);
    requestConfigBuilder.setSocketTimeout(
        context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    final RequestConfig requestConfig = requestConfigBuilder.build();

    final StreamThrottler throttler = throttlerRef.get();
    final ProcessorLog logger = getLogger();

    final Double maxBatchBytes = context.getProperty(MAX_BATCH_SIZE).asDataSize(DataUnit.B);
    String lastUrl = null;
    long bytesToSend = 0L;

    final List<FlowFile> toSend = new ArrayList<>();
    DestinationAccepts destinationAccepts = null;
    CloseableHttpClient client = null;
    final String transactionId = UUID.randomUUID().toString();

    final ObjectHolder<String> dnHolder = new ObjectHolder<>("none");
    while (true) {
      FlowFile flowFile = session.get();
      if (flowFile == null) {
        break;
      }

      final String url = context.getProperty(URL).evaluateAttributeExpressions(flowFile).getValue();
      try {
        new java.net.URL(url);
      } catch (final MalformedURLException e) {
        logger.error(
            "After substituting attribute values for {}, URL is {}; this is not a valid URL, so routing to failure",
            new Object[] {flowFile, url});
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        continue;
      }

      // If this FlowFile doesn't have the same url, throw it back on the queue and stop grabbing
      // FlowFiles
      if (lastUrl != null && !lastUrl.equals(url)) {
        session.transfer(flowFile);
        break;
      }

      lastUrl = url;
      toSend.add(flowFile);

      if (client == null || destinationAccepts == null) {
        final Config config = getConfig(url, context);
        final HttpClientConnectionManager conMan = config.getConnectionManager();

        final HttpClientBuilder clientBuilder = HttpClientBuilder.create();
        clientBuilder.setConnectionManager(conMan);
        clientBuilder.setUserAgent(userAgent);
        clientBuilder.addInterceptorFirst(
            new HttpResponseInterceptor() {
              @Override
              public void process(final HttpResponse response, final HttpContext httpContext)
                  throws HttpException, IOException {
                HttpCoreContext coreContext = HttpCoreContext.adapt(httpContext);
                ManagedHttpClientConnection conn =
                    coreContext.getConnection(ManagedHttpClientConnection.class);
                if (!conn.isOpen()) {
                  return;
                }

                SSLSession sslSession = conn.getSSLSession();

                if (sslSession != null) {
                  final X509Certificate[] certChain = sslSession.getPeerCertificateChain();
                  if (certChain == null || certChain.length == 0) {
                    throw new SSLPeerUnverifiedException("No certificates found");
                  }

                  final X509Certificate cert = certChain[0];
                  dnHolder.set(cert.getSubjectDN().getName().trim());
                }
              }
            });

        clientBuilder.disableAutomaticRetries();
        clientBuilder.disableContentCompression();

        final String username = context.getProperty(USERNAME).getValue();
        final String password = context.getProperty(PASSWORD).getValue();
        // set the credentials if appropriate
        if (username != null) {
          final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
          if (password == null) {
            credentialsProvider.setCredentials(
                AuthScope.ANY, new UsernamePasswordCredentials(username));
          } else {
            credentialsProvider.setCredentials(
                AuthScope.ANY, new UsernamePasswordCredentials(username, password));
          }
          clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }
        client = clientBuilder.build();

        // determine whether or not destination accepts flowfile/gzip
        destinationAccepts = config.getDestinationAccepts();
        if (destinationAccepts == null) {
          try {
            if (sendAsFlowFile) {
              destinationAccepts =
                  getDestinationAcceptance(client, url, getLogger(), transactionId);
            } else {
              destinationAccepts = new DestinationAccepts(false, false, false, false, null);
            }

            config.setDestinationAccepts(destinationAccepts);
          } catch (IOException e) {
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
            logger.error(
                "Unable to communicate with destination {} to determine whether or not it can accept "
                    + "flowfiles/gzip; routing {} to failure due to {}",
                new Object[] {url, flowFile, e});
            context.yield();
            return;
          }
        }
      }

      // if we are not sending as flowfile, or if the destination doesn't accept V3 or V2
      // (streaming) format,
      // then only use a single FlowFile
      if (!sendAsFlowFile
          || (!destinationAccepts.isFlowFileV3Accepted()
              && !destinationAccepts.isFlowFileV2Accepted())) {
        break;
      }

      bytesToSend += flowFile.getSize();
      if (bytesToSend > maxBatchBytes.longValue()) {
        break;
      }
    }

    if (toSend.isEmpty()) {
      return;
    }

    final String url = lastUrl;
    final HttpPost post = new HttpPost(url);
    final List<FlowFile> flowFileList = toSend;
    final DestinationAccepts accepts = destinationAccepts;
    final boolean isDestinationLegacyNiFi = accepts.getProtocolVersion() == null;

    final EntityTemplate entity =
        new EntityTemplate(
            new ContentProducer() {
              @Override
              public void writeTo(final OutputStream rawOut) throws IOException {
                final OutputStream throttled =
                    (throttler == null) ? rawOut : throttler.newThrottledOutputStream(rawOut);
                OutputStream wrappedOut = new BufferedOutputStream(throttled);
                if (compressionLevel > 0 && accepts.isGzipAccepted()) {
                  wrappedOut = new GZIPOutputStream(wrappedOut, compressionLevel);
                }

                try (final OutputStream out = wrappedOut) {
                  for (final FlowFile flowFile : flowFileList) {
                    session.read(
                        flowFile,
                        new InputStreamCallback() {
                          @Override
                          public void process(final InputStream rawIn) throws IOException {
                            try (final InputStream in = new BufferedInputStream(rawIn)) {

                              FlowFilePackager packager = null;
                              if (!sendAsFlowFile) {
                                packager = null;
                              } else if (accepts.isFlowFileV3Accepted()) {
                                packager = new FlowFilePackagerV3();
                              } else if (accepts.isFlowFileV2Accepted()) {
                                packager = new FlowFilePackagerV2();
                              } else if (accepts.isFlowFileV1Accepted()) {
                                packager = new FlowFilePackagerV1();
                              }

                              // if none of the above conditions is met, we should never get here,
                              // because
                              // we will have already verified that at least 1 of the FlowFile
                              // packaging
                              // formats is acceptable if sending as FlowFile.
                              if (packager == null) {
                                StreamUtils.copy(in, out);
                              } else {
                                final Map<String, String> flowFileAttributes;
                                if (isDestinationLegacyNiFi) {
                                  // Old versions of NiFi expect nf.file.name and nf.file.path to
                                  // indicate filename & path;
                                  // in order to maintain backward compatibility, we copy the
                                  // filename & path to those attribute keys.
                                  flowFileAttributes = new HashMap<>(flowFile.getAttributes());
                                  flowFileAttributes.put(
                                      "nf.file.name",
                                      flowFile.getAttribute(CoreAttributes.FILENAME.key()));
                                  flowFileAttributes.put(
                                      "nf.file.path",
                                      flowFile.getAttribute(CoreAttributes.PATH.key()));
                                } else {
                                  flowFileAttributes = flowFile.getAttributes();
                                }

                                packager.packageFlowFile(
                                    in, out, flowFileAttributes, flowFile.getSize());
                              }
                            }
                          }
                        });
                  }

                  out.flush();
                }
              }
            });

    entity.setChunked(context.getProperty(CHUNKED_ENCODING).asBoolean());
    post.setEntity(entity);
    post.setConfig(requestConfig);

    final String contentType;
    if (sendAsFlowFile) {
      if (accepts.isFlowFileV3Accepted()) {
        contentType = APPLICATION_FLOW_FILE_V3;
      } else if (accepts.isFlowFileV2Accepted()) {
        contentType = APPLICATION_FLOW_FILE_V2;
      } else if (accepts.isFlowFileV1Accepted()) {
        contentType = APPLICATION_FLOW_FILE_V1;
      } else {
        logger.error(
            "Cannot send data to {} because the destination does not accept FlowFiles and this processor is "
                + "configured to deliver FlowFiles; rolling back session",
            new Object[] {url});
        session.rollback();
        context.yield();
        return;
      }
    } else {
      final String attributeValue = toSend.get(0).getAttribute(CoreAttributes.MIME_TYPE.key());
      contentType = (attributeValue == null) ? DEFAULT_CONTENT_TYPE : attributeValue;
    }

    final String attributeHeaderRegex = context.getProperty(ATTRIBUTES_AS_HEADERS_REGEX).getValue();
    if (attributeHeaderRegex != null && !sendAsFlowFile && flowFileList.size() == 1) {
      final Pattern pattern = Pattern.compile(attributeHeaderRegex);

      final Map<String, String> attributes = flowFileList.get(0).getAttributes();
      for (final Map.Entry<String, String> entry : attributes.entrySet()) {
        final String key = entry.getKey();
        if (pattern.matcher(key).matches()) {
          post.setHeader(entry.getKey(), entry.getValue());
        }
      }
    }

    post.setHeader(CONTENT_TYPE, contentType);
    post.setHeader(FLOWFILE_CONFIRMATION_HEADER, "true");
    post.setHeader(PROTOCOL_VERSION_HEADER, PROTOCOL_VERSION);
    post.setHeader(TRANSACTION_ID_HEADER, transactionId);
    if (compressionLevel > 0 && accepts.isGzipAccepted()) {
      post.setHeader(GZIPPED_HEADER, "true");
    }

    // Do the actual POST
    final String flowFileDescription =
        toSend.size() <= 10 ? toSend.toString() : toSend.size() + " FlowFiles";

    final String uploadDataRate;
    final long uploadMillis;
    CloseableHttpResponse response = null;
    try {
      final StopWatch stopWatch = new StopWatch(true);
      response = client.execute(post);

      // consume input stream entirely, ignoring its contents. If we
      // don't do this, the Connection will not be returned to the pool
      EntityUtils.consume(response.getEntity());
      stopWatch.stop();
      uploadDataRate = stopWatch.calculateDataRate(bytesToSend);
      uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
    } catch (final IOException e) {
      logger.error(
          "Failed to Post {} due to {}; transferring to failure",
          new Object[] {flowFileDescription, e});
      context.yield();
      for (FlowFile flowFile : toSend) {
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
      }
      return;
    } finally {
      if (response != null) {
        try {
          response.close();
        } catch (IOException e) {
          getLogger().warn("Failed to close HTTP Response due to {}", new Object[] {e});
        }
      }
    }

    // If we get a 'SEE OTHER' status code and an HTTP header that indicates that the intent
    // of the Location URI is a flowfile hold, we will store this holdUri. This prevents us
    // from posting to some other webservice and then attempting to delete some resource to which
    // we are redirected
    final int responseCode = response.getStatusLine().getStatusCode();
    final String responseReason = response.getStatusLine().getReasonPhrase();
    String holdUri = null;
    if (responseCode == HttpServletResponse.SC_SEE_OTHER) {
      final Header locationUriHeader = response.getFirstHeader(LOCATION_URI_INTENT_NAME);
      if (locationUriHeader != null) {
        if (LOCATION_URI_INTENT_VALUE.equals(locationUriHeader.getValue())) {
          final Header holdUriHeader = response.getFirstHeader(LOCATION_HEADER_NAME);
          if (holdUriHeader != null) {
            holdUri = holdUriHeader.getValue();
          }
        }
      }

      if (holdUri == null) {
        for (FlowFile flowFile : toSend) {
          flowFile = session.penalize(flowFile);
          logger.error(
              "Failed to Post {} to {}: sent content and received status code {}:{} but no Hold URI",
              new Object[] {flowFile, url, responseCode, responseReason});
          session.transfer(flowFile, REL_FAILURE);
        }
        return;
      }
    }

    if (holdUri == null) {
      if (responseCode == HttpServletResponse.SC_SERVICE_UNAVAILABLE) {
        for (FlowFile flowFile : toSend) {
          flowFile = session.penalize(flowFile);
          logger.error(
              "Failed to Post {} to {}: response code was {}:{}; will yield processing, "
                  + "since the destination is temporarily unavailable",
              new Object[] {flowFile, url, responseCode, responseReason});
          session.transfer(flowFile, REL_FAILURE);
        }
        context.yield();
        return;
      }

      if (responseCode >= 300) {
        for (FlowFile flowFile : toSend) {
          flowFile = session.penalize(flowFile);
          logger.error(
              "Failed to Post {} to {}: response code was {}:{}",
              new Object[] {flowFile, url, responseCode, responseReason});
          session.transfer(flowFile, REL_FAILURE);
        }
        return;
      }

      logger.info(
          "Successfully Posted {} to {} in {} at a rate of {}",
          new Object[] {
            flowFileDescription,
            url,
            FormatUtils.formatMinutesSeconds(uploadMillis, TimeUnit.MILLISECONDS),
            uploadDataRate
          });

      for (final FlowFile flowFile : toSend) {
        session
            .getProvenanceReporter()
            .send(flowFile, url, "Remote DN=" + dnHolder.get(), uploadMillis, true);
        session.transfer(flowFile, REL_SUCCESS);
      }
      return;
    }

    //
    // the response indicated a Hold URI; delete the Hold.
    //
    // determine the full URI of the Flow File's Hold; Unfortunately, the responses that are
    // returned have
    // changed over the past, so we have to take into account a few different possibilities.
    String fullHoldUri = holdUri;
    if (holdUri.startsWith("/contentListener")) {
      // If the Hold URI that we get starts with /contentListener, it may not really be
      // /contentListener,
      // as this really indicates that it should be whatever we posted to -- if posting directly to
      // the
      // ListenHTTP component, it will be /contentListener, but if posting to a proxy/load balancer,
      // we may
      // be posting to some other URL.
      fullHoldUri = url + holdUri.substring(16);
    } else if (holdUri.startsWith("/")) {
      // URL indicates the full path but not hostname or port; use the same hostname & port that we
      // posted
      // to but use the full path indicated by the response.
      int firstSlash = url.indexOf("/", 8);
      if (firstSlash < 0) {
        firstSlash = url.length();
      }
      final String beforeSlash = url.substring(0, firstSlash);
      fullHoldUri = beforeSlash + holdUri;
    } else if (!holdUri.startsWith("http")) {
      // Absolute URL
      fullHoldUri = url + (url.endsWith("/") ? "" : "/") + holdUri;
    }

    final HttpDelete delete = new HttpDelete(fullHoldUri);
    delete.setHeader(TRANSACTION_ID_HEADER, transactionId);

    while (true) {
      try {
        final HttpResponse holdResponse = client.execute(delete);
        EntityUtils.consume(holdResponse.getEntity());
        final int holdStatusCode = holdResponse.getStatusLine().getStatusCode();
        final String holdReason = holdResponse.getStatusLine().getReasonPhrase();
        if (holdStatusCode >= 300) {
          logger.error(
              "Failed to delete Hold that destination placed on {}: got response code {}:{}; routing to failure",
              new Object[] {flowFileDescription, holdStatusCode, holdReason});

          for (FlowFile flowFile : toSend) {
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
          }
          return;
        }

        logger.info(
            "Successfully Posted {} to {} in {} milliseconds at a rate of {}",
            new Object[] {flowFileDescription, url, uploadMillis, uploadDataRate});

        for (FlowFile flowFile : toSend) {
          session.getProvenanceReporter().send(flowFile, url);
          session.transfer(flowFile, REL_SUCCESS);
        }
        return;
      } catch (final IOException e) {
        logger.warn(
            "Failed to delete Hold that destination placed on {} due to {}",
            new Object[] {flowFileDescription, e});
      }

      if (!isScheduled()) {
        context.yield();
        logger.warn(
            "Failed to delete Hold that destination placed on {}; Processor has been stopped so routing FlowFile(s) to failure",
            new Object[] {flowFileDescription});
        for (FlowFile flowFile : toSend) {
          flowFile = session.penalize(flowFile);
          session.transfer(flowFile, REL_FAILURE);
        }
        return;
      }
    }
  }
  @Override
  public boolean authenticate() {
    if (!super.authenticate()) {
      LOG.error(
          String.format(
              "blank username or password detected, no %s xword will be downloaded",
              this.getType()));
      return false;
    }

    final HttpUriRequest loginGet = RequestBuilder.get().setUri(NYT_LOGIN_URL).build();

    final String loginPage;
    try (final CloseableHttpResponse getResponse = this.getHttpClient().execute(loginGet)) {
      loginPage = EntityUtils.toString(getResponse.getEntity());
    } catch (final IOException e) {
      LOG.error("error while navigating to NYT login page", e);
      return false;
    }

    final String token;
    final String expires;

    try {
      final TagNode node = this.getCleaner().clean(loginPage);

      final Object[] foundNodes = node.evaluateXPath("//input[@name='token']");
      if (foundNodes.length != 1) {
        this.throwLoginException(
            "unexpected login page, found %d hidden token input elements, expected 1",
            foundNodes.length);
      }
      final TagNode hiddenTokenInput = (TagNode) foundNodes[0];
      token = hiddenTokenInput.getAttributeByName("value");
      LOG.debug("found hidden input token {}", token);

      final Object[] foundExpiresNodes = node.evaluateXPath("//input[@name='expires']");
      if (foundExpiresNodes.length != 1) {
        this.throwLoginException(
            "unexpected login page, found %d hidden token expiration input elements, expected 1",
            foundNodes.length);
      }
      final TagNode hiddenTokenExpiresInput = (TagNode) foundExpiresNodes[0];
      expires = hiddenTokenExpiresInput.getAttributeByName("value");
      LOG.debug("found hidden input token expiration {}", expires);
    } catch (LoginException | XPatherException e) {
      LOG.error("error while pulling login tokens from NYT login page", e);
      return false;
    }

    // @formatter:off
    final HttpUriRequest loginPost =
        RequestBuilder.post()
            .setUri("https://myaccount.nytimes.com/auth/login")
            .addParameter("is_continue", Boolean.FALSE.toString())
            .addParameter("token", token)
            .addParameter("expires", expires)
            .addParameter("userid", this.getLoginInfo().getUsername())
            .addParameter("password", this.getLoginInfo().getPassword())
            .addParameter("remember", Boolean.TRUE.toString())
            .build();
    // @formatter:on

    try (CloseableHttpResponse postResponse = this.getHttpClient().execute(loginPost)) {

      // successful NYT login should give 302 status
      final int responseStatus = postResponse.getStatusLine().getStatusCode();
      if (responseStatus != 302) {
        final String errorMessage =
            String.format("did not detect expected 302 redirect, got %d instead", responseStatus);
        throw new LoginException(errorMessage);
      }

      // successful NYT login redirects to the NYT homepage
      final Header location = postResponse.getFirstHeader("Location");
      // have seen this redirect both with and without the final portion
      final Pattern expectedRedirectLocation =
          Pattern.compile("http://www.nytimes.com(\\?login=email)*");
      final String actualRedirectLocation = location.getValue();
      final Matcher matcher = expectedRedirectLocation.matcher(actualRedirectLocation);
      if (!matcher.matches()) {
        final String errorMessage =
            String.format(
                "redirect to unexpected URL, expected %s, found Location=%s instead",
                expectedRedirectLocation, actualRedirectLocation);
        throw new LoginException(errorMessage);
      }

      // successful NYT login should set a few cookies
      final Header[] cookies = postResponse.getHeaders("Set-Cookie");
      if (cookies.length < 1) {
        throw new LoginException("no post login cookies set, login likely failed");
      }

    } catch (final IOException | LoginException e) {
      LOG.error("error while logging in, e={}", e.getMessage());
      return false;
    }

    LOG.info("successfully logged in to nyt");
    return true;
  }
  /**
   * Downloads to a directory represented by a {@link File} object, determining the file name from
   * the Content-Disposition header.
   *
   * @param url URL of file
   * @param base base directory in which the download is saved
   * @return the absolute file path of the downloaded file, or an empty string if the file could not
   *     be downloaded
   */
  public String download(String url, File base) {
    if (mHTTPClient == null) {
      mHTTPClient = createHTTPClient(mPrivateKey, mCertificate, mValidateCertificate);
      if (mHTTPClient == null) return "";
    }

    LOGGER.info("downloading file from URL=" + url + "...");
    mCurrentRequest = new HttpGet(url);

    // execute request
    CloseableHttpResponse response;
    try {
      response = mHTTPClient.execute(mCurrentRequest);
    } catch (IOException ex) {
      LOGGER.log(Level.WARNING, "can't execute request", ex);
      return "";
    }

    try {
      int code = response.getStatusLine().getStatusCode();
      // HTTP/1.1 200 OK -- other codes should throw Exceptions
      if (code != 200) {
        LOGGER.warning("invalid response code: " + code);
        return "";
      }

      // get filename
      Header dispHeader = response.getFirstHeader("Content-Disposition");
      if (dispHeader == null) {
        LOGGER.warning("no content header");
        return "";
      }
      String filename = parseContentDisposition(dispHeader.getValue());
      // never trust incoming data
      filename = filename != null ? new File(filename).getName() : "";
      if (filename.isEmpty()) {
        LOGGER.warning("no filename in content: " + dispHeader.getValue());
        return "";
      }

      // get file size
      long s = -1;
      Header lengthHeader = response.getFirstHeader("Content-Length");
      if (lengthHeader == null) {
        LOGGER.warning("no length header");
      } else {
        try {
          s = Long.parseLong(lengthHeader.getValue());
        } catch (NumberFormatException ex) {
          LOGGER.log(Level.WARNING, "can' parse file size", ex);
        }
      }
      final long fileSize = s;
      mListener.updateProgress(s < 0 ? -2 : 0);

      // TODO should check for content-disposition parsing here
      // and choose another filename if necessary
      HttpEntity entity = response.getEntity();
      if (entity == null) {
        LOGGER.warning("no entity in response");
        return "";
      }

      File destination = new File(base, filename);
      if (destination.exists()) {
        LOGGER.warning("file already exists: " + destination.getAbsolutePath());
        return "";
      }
      try (FileOutputStream out = new FileOutputStream(destination)) {
        CountingOutputStream cOut =
            new CountingOutputStream(out) {
              @Override
              protected synchronized void afterWrite(int n) {
                if (fileSize <= 0) return;

                // inform listener
                mListener.updateProgress((int) (this.getByteCount() / (fileSize * 1.0) * 100));
              }
            };
        entity.writeTo(cOut);
      } catch (IOException ex) {
        LOGGER.log(Level.WARNING, "can't download file", ex);
        return "";
      }

      LOGGER.info("... download successful!");
      return destination.getAbsolutePath();
    } finally {
      try {
        response.close();
      } catch (IOException ex) {
        LOGGER.log(Level.WARNING, "can't close response", ex);
      }
    }
  }
示例#16
0
 private static void assert404NotFound(CloseableHttpResponse res) {
   assertStatusLine(res, "HTTP/1.1 404 Not Found");
   // Ensure that the 'Last-Modified' header does not exist.
   assertThat(res.getFirstHeader(HttpHeaders.LAST_MODIFIED), is(nullValue()));
 }