private void doSetupMockRequest( final String contentType, final ByteArrayEntity entity, final int status) throws IOException { final Header contentTypeHeader = new BasicHeader("Content-Type", contentType); final Header[] linkHeaders = new Header[] {}; when(mockHttpclient.execute(any(HttpUriRequest.class))).thenReturn(mockResponse); when(mockResponse.getFirstHeader("Location")).thenReturn(null); when(mockResponse.getFirstHeader("Content-Type")).thenReturn(contentTypeHeader); when(mockResponse.getHeaders("Link")).thenReturn(linkHeaders); when(mockResponse.getEntity()).thenReturn(entity); when(mockResponse.getStatusLine()).thenReturn(mockStatus); when(mockStatus.getStatusCode()).thenReturn(status); }
private static void assert304NotModified( CloseableHttpResponse res, String expectedLastModified, String expectedContentType) { assertStatusLine(res, "HTTP/1.1 304 Not Modified"); // Ensure that the 'Last-Modified' header did not change. assertThat(res.getFirstHeader(HttpHeaders.LAST_MODIFIED).getValue(), is(expectedLastModified)); // Ensure that the content does not exist but its type does. assertThat(res.getEntity(), is(nullValue())); assertThat(res.containsHeader(HttpHeaders.CONTENT_TYPE), is(true)); assertThat( res.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue(), startsWith(expectedContentType)); }
public static Object fetch(DocumentSource source) { try (CloseableHttpClient httpclient = HttpClients.createDefault()) { URIBuilder builder = new URIBuilder(source.url); for (Entry<String, String> it : source.parameters.entrySet()) { builder.addParameter(it.getKey(), it.getValue()); } URI uri = builder.build(); HttpUriRequest request = new HttpGet(uri); request.addHeader("Accept", "application/json"); CloseableHttpResponse response = httpclient.execute(request); String headers = response.getFirstHeader("Content-Type").getValue(); InputStream inputStream = response.getEntity().getContent(); if (headers.contains("text/html")) { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); Document document = Jsoup.parse(inputStream, null, source.url); return document; } else if (headers.contains("json")) { ObjectMapper om = new ObjectMapper(); return om.readValue(inputStream, HashMap.class); } else { IOUtils.copy(inputStream, System.err); } } catch (Exception e) { throw new RuntimeException(e); } return null; }
public static void main(String[] args) throws ClientProtocolException, IOException { try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { // HttpClient httpClient = getTestHttpClient(); // HttpHost proxy = new HttpHost("127.0.0.1", 8888); // httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, // proxy); String apiKey = args[0]; String latitude = "46.947922"; String longitude = "7.444608"; String urlTemplate = "https://api.forecast.io/forecast/%s/%s,%s"; String url = String.format(urlTemplate, apiKey, latitude, longitude); System.out.println(url); HttpGet httpget = new HttpGet(url); try (CloseableHttpResponse response = httpClient.execute(httpget)) { System.out.println(response.getFirstHeader("Content-Encoding")); HeaderIterator it = response.headerIterator(); while (it.hasNext()) { Object header = it.next(); System.out.println(header); } HttpEntity entity = response.getEntity(); String jsonData = EntityUtils.toString(entity, StandardCharsets.UTF_8); System.out.println(jsonData); } } }
public InputStream getInputStream() throws Exception { httpClient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet(uri); System.out.println(uri); RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).build(); httpGet.setConfig(requestConfig); // DefaultHttpRequestRetryHandler handler = new DefaultHttpRequestRetryHandler(3, false); // httpClient.setHttpRequestRetryHandler(handler); response = httpClient.execute(httpGet); System.out.println(response.getStatusLine()); if (HttpStatus.SC_OK != response.getStatusLine().getStatusCode()) { // System.out.println("Connect Error"); return null; } if (key != null) { Header header = response.getFirstHeader(key); if (header == null) { // not valid date // System.out.println("Not valid date."); return null; } } entity = response.getEntity(); return entity.getContent(); }
public void getTPBShows() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse response = null; try { HttpGet httpGet = new HttpGet(pageURL); httpGet.addHeader( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"); response = httpClient.execute(httpGet); HttpEntity httpEntity = response.getEntity(); Header contentType = response.getFirstHeader("Content-Type"); String[] contentArray = contentType.getValue().split(";"); String charset = "UTF-8"; // String mimeType = contentArray[0].trim(); if (contentArray.length > 1 && contentArray[1].contains("=")) { charset = contentArray[1].trim().split("=")[1]; } Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath()); Element results = pageDoc.getElementById("searchResult"); response.close(); Elements rawShowObjects = results.select("td.vertTh+td"); TPBToTvShowEpisode makeShows = new TPBToTvShowEpisode(); List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects); DBActions.insertTvEpisodes(theShows, pageURL); } catch (MalformedURLException MURLe) { // Utilities.sendExceptionEmail(MURLe.getMessage()); MURLe.printStackTrace(); } catch (Exception e) { // Utilities.sendExceptionEmail(e.getMessage()); e.printStackTrace(); } }
@Test public void testDefaultHandlerFavicon() throws Exception { try (CloseableHttpClient hc = HttpClients.createMinimal()) { try (CloseableHttpResponse res = hc.execute(new HttpGet(uri("/default/favicon.ico")))) { assertThat(res.getStatusLine().toString(), is("HTTP/1.1 200 OK")); assertThat( res.getFirstHeader(HttpHeaderNames.CONTENT_TYPE.toString()).getValue(), startsWith("image/x-icon")); assertThat(EntityUtils.toByteArray(res.getEntity()).length, is(greaterThan(0))); } } }
private static String assert200Ok( CloseableHttpResponse res, String expectedContentType, String expectedContent) throws Exception { assertStatusLine(res, "HTTP/1.1 200 OK"); // Ensure that the 'Last-Modified' header exists and is well-formed. final String lastModified; assertThat(res.containsHeader(HttpHeaders.LAST_MODIFIED), is(true)); lastModified = res.getFirstHeader(HttpHeaders.LAST_MODIFIED).getValue(); HttpHeaderDateFormat.get().parse(lastModified); // Ensure the content and its type are correct. assertThat(EntityUtils.toString(res.getEntity()), is(expectedContent)); assertThat(res.containsHeader(HttpHeaders.CONTENT_TYPE), is(true)); assertThat( res.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue(), startsWith(expectedContentType)); return lastModified; }
public void getIPTShows() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse response = null; String pageURL = "https://www.iptorrents.com"; try { HttpGet httpGet = new HttpGet(pageURL); httpGet.addHeader( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"); response = httpClient.execute(httpGet); response.removeHeaders("Transfer-Encoding"); HttpPost thePost = new HttpPost(pageURL + "?username=mcpchelper81&password=ru68ce48&php="); thePost.setHeaders(response.getAllHeaders()); response.close(); response = null; response = httpClient.execute(thePost); httpGet = new HttpGet("https://www.iptorrents.com/t?5"); httpGet.setHeaders(response.getHeaders("set-cookie")); httpGet.addHeader( "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpGet.addHeader("accept-encoding", "gzip, deflate, sdch"); httpGet.addHeader("accept-language", "en-US,en;q=0.8"); httpGet.addHeader("dnt", "1"); httpGet.addHeader("upgrade-insecure-requests", "1"); httpGet.addHeader( "user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"); response.close(); response = null; response = httpClient.execute(httpGet); Header contentType = response.getFirstHeader("Content-Type"); HttpEntity httpEntity = response.getEntity(); String[] contentArray = contentType.getValue().split(";"); String charset = "UTF-8"; if (contentArray.length > 1 && contentArray[1].contains("=")) { charset = contentArray[1].trim().split("=")[1]; } Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath()); Elements results = pageDoc.getElementsByClass("torrents"); response.close(); Elements rawShowObjects = results.select("tr"); IPTToTvShowEpisode makeShows = new IPTToTvShowEpisode(); List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects); DBActions.insertIPTTvEpisodes(theShows, "https://www.iptorrents.com/t?5"); } catch (MalformedURLException MURLe) { MURLe.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
/** * 执行请求,返回字节 * * @param charset * @param httpUriRequest * @return */ public byte[] execute_byte(HttpUriRequest httpUriRequest, Map<String, String> header) { byte[] data = null; HttpEntity entity = null; try { CloseableHttpResponse httpResponse = httpclient.execute(httpUriRequest); int statusCode = httpResponse.getStatusLine().getStatusCode(); entity = httpResponse.getEntity(); Header heade = entity.getContentType(); if (heade != null) { log.info("statusCode : " + statusCode + " ContentType : " + heade.getValue()); setContent_type(heade.getValue()); } else { log.info("statusCode : " + statusCode + " ContentType : unknown ."); } setStatusCode(statusCode); if (statusCode == 200) { data = EntityUtils.toByteArray(entity); } else if (statusCode == 302 || statusCode == 300 || statusCode == 301) { URL referer = httpUriRequest.getURI().toURL(); httpUriRequest.abort(); Header location = httpResponse.getFirstHeader("Location"); String locationurl = location.getValue(); if (!locationurl.startsWith("http")) { URL u = new URL(referer, locationurl); locationurl = u.toExternalForm(); } data = GetImg(locationurl, header); } else { data = EntityUtils.toByteArray(entity); } } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (httpUriRequest != null) { httpUriRequest.abort(); } if (entity != null) { EntityUtils.consumeQuietly(entity); } } return data; }
/** * 执行请求,返回文字 * * @param charset * @param httpUriRequest * @return */ public String execute_text( String charset, Map<String, String> header, HttpUriRequest httpUriRequest) { String text = ""; try { CloseableHttpResponse httpResponse = httpclient.execute(httpUriRequest); int statusCode = httpResponse.getStatusLine().getStatusCode(); Header heade = httpResponse.getEntity().getContentType(); if (heade != null) { setContent_type(heade.getValue()); log.info("statusCode : " + statusCode + " ContentType : " + heade.getValue()); } else { log.info("statusCode : " + statusCode + " ContentType : unknown ."); } setStatusCode(statusCode); if (statusCode == 200) { text = getContent(charset, httpResponse); } else if (statusCode == 302 || statusCode == 300 || statusCode == 301) { URL referer = httpUriRequest.getURI().toURL(); httpUriRequest.abort(); Header location = httpResponse.getFirstHeader("Location"); String locationurl = location.getValue(); if (!locationurl.startsWith("http")) { URL u = new URL(referer, locationurl); locationurl = u.toExternalForm(); } text = Get(locationurl, header, charset); } else { text = getContent(charset, httpResponse); } } catch (ClientProtocolException e) { log.error(e.getMessage()); } catch (IOException e) { log.error(e.getMessage()); } catch (Exception e) { log.error(e.getMessage()); } finally { if (httpUriRequest != null) { httpUriRequest.abort(); } } return text; }
public void getKATorrents() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse response = null; try { HttpGet httpGet = new HttpGet("https://kat.cr/tv/?field=time_add&sorder=desc"); httpGet.addHeader( "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpGet.addHeader("accept-encoding", "gzip, deflate, sdch"); httpGet.addHeader("accept-language", "en-US,en;q=0.8"); httpGet.addHeader("dnt", "1"); httpGet.addHeader("upgrade-insecure-requests", "1"); httpGet.addHeader( "user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"); response = httpClient.execute(httpGet); Header contentType = response.getFirstHeader("Content-Type"); HttpEntity httpEntity = response.getEntity(); String[] contentArray = contentType.getValue().split(";"); String charset = "UTF-8"; if (contentArray.length > 1 && contentArray[1].contains("=")) { charset = contentArray[1].trim().split("=")[1]; } Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath()); Elements oddResults = pageDoc.getElementsByClass("odd"); Elements evenResults = pageDoc.getElementsByClass("even"); Elements allshows = new Elements(); for (int i = 0; i < evenResults.size(); i++) { allshows.add(oddResults.get(i)); allshows.add(evenResults.get(i)); } allshows.add(oddResults.last()); response.close(); KATToTvShowEpisode kat = new KATToTvShowEpisode(); List<TvShowEpisode> theShows = kat.makeKATBeans(allshows); DBActions.insertTvEpisodes(theShows, "https://kat.cr/tv/?field=time_add&sorder=desc"); } catch (MalformedURLException MURLe) { MURLe.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { final boolean sendAsFlowFile = context.getProperty(SEND_AS_FLOWFILE).asBoolean(); final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger(); final String userAgent = context.getProperty(USER_AGENT).getValue(); final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom(); requestConfigBuilder.setConnectionRequestTimeout( context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue()); requestConfigBuilder.setConnectTimeout( context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue()); requestConfigBuilder.setRedirectsEnabled(false); requestConfigBuilder.setSocketTimeout( context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue()); final RequestConfig requestConfig = requestConfigBuilder.build(); final StreamThrottler throttler = throttlerRef.get(); final ProcessorLog logger = getLogger(); final Double maxBatchBytes = context.getProperty(MAX_BATCH_SIZE).asDataSize(DataUnit.B); String lastUrl = null; long bytesToSend = 0L; final List<FlowFile> toSend = new ArrayList<>(); DestinationAccepts destinationAccepts = null; CloseableHttpClient client = null; final String transactionId = UUID.randomUUID().toString(); final ObjectHolder<String> dnHolder = new ObjectHolder<>("none"); while (true) { FlowFile flowFile = session.get(); if (flowFile == null) { break; } final String url = context.getProperty(URL).evaluateAttributeExpressions(flowFile).getValue(); try { new java.net.URL(url); } catch (final MalformedURLException e) { logger.error( "After substituting attribute values for {}, URL is {}; this is not a valid URL, so routing to failure", new Object[] {flowFile, url}); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); continue; } // If this FlowFile doesn't have the same url, throw it back on the queue and stop grabbing // FlowFiles if (lastUrl != null && !lastUrl.equals(url)) { session.transfer(flowFile); break; } lastUrl = url; toSend.add(flowFile); if (client == null || destinationAccepts == null) { final Config config = getConfig(url, context); final HttpClientConnectionManager conMan = config.getConnectionManager(); final HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setConnectionManager(conMan); clientBuilder.setUserAgent(userAgent); clientBuilder.addInterceptorFirst( new HttpResponseInterceptor() { @Override public void process(final HttpResponse response, final HttpContext httpContext) throws HttpException, IOException { HttpCoreContext coreContext = HttpCoreContext.adapt(httpContext); ManagedHttpClientConnection conn = coreContext.getConnection(ManagedHttpClientConnection.class); if (!conn.isOpen()) { return; } SSLSession sslSession = conn.getSSLSession(); if (sslSession != null) { final X509Certificate[] certChain = sslSession.getPeerCertificateChain(); if (certChain == null || certChain.length == 0) { throw new SSLPeerUnverifiedException("No certificates found"); } final X509Certificate cert = certChain[0]; dnHolder.set(cert.getSubjectDN().getName().trim()); } } }); clientBuilder.disableAutomaticRetries(); clientBuilder.disableContentCompression(); final String username = context.getProperty(USERNAME).getValue(); final String password = context.getProperty(PASSWORD).getValue(); // set the credentials if appropriate if (username != null) { final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); if (password == null) { credentialsProvider.setCredentials( AuthScope.ANY, new UsernamePasswordCredentials(username)); } else { credentialsProvider.setCredentials( AuthScope.ANY, new UsernamePasswordCredentials(username, password)); } clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } client = clientBuilder.build(); // determine whether or not destination accepts flowfile/gzip destinationAccepts = config.getDestinationAccepts(); if (destinationAccepts == null) { try { if (sendAsFlowFile) { destinationAccepts = getDestinationAcceptance(client, url, getLogger(), transactionId); } else { destinationAccepts = new DestinationAccepts(false, false, false, false, null); } config.setDestinationAccepts(destinationAccepts); } catch (IOException e) { flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); logger.error( "Unable to communicate with destination {} to determine whether or not it can accept " + "flowfiles/gzip; routing {} to failure due to {}", new Object[] {url, flowFile, e}); context.yield(); return; } } } // if we are not sending as flowfile, or if the destination doesn't accept V3 or V2 // (streaming) format, // then only use a single FlowFile if (!sendAsFlowFile || (!destinationAccepts.isFlowFileV3Accepted() && !destinationAccepts.isFlowFileV2Accepted())) { break; } bytesToSend += flowFile.getSize(); if (bytesToSend > maxBatchBytes.longValue()) { break; } } if (toSend.isEmpty()) { return; } final String url = lastUrl; final HttpPost post = new HttpPost(url); final List<FlowFile> flowFileList = toSend; final DestinationAccepts accepts = destinationAccepts; final boolean isDestinationLegacyNiFi = accepts.getProtocolVersion() == null; final EntityTemplate entity = new EntityTemplate( new ContentProducer() { @Override public void writeTo(final OutputStream rawOut) throws IOException { final OutputStream throttled = (throttler == null) ? rawOut : throttler.newThrottledOutputStream(rawOut); OutputStream wrappedOut = new BufferedOutputStream(throttled); if (compressionLevel > 0 && accepts.isGzipAccepted()) { wrappedOut = new GZIPOutputStream(wrappedOut, compressionLevel); } try (final OutputStream out = wrappedOut) { for (final FlowFile flowFile : flowFileList) { session.read( flowFile, new InputStreamCallback() { @Override public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { FlowFilePackager packager = null; if (!sendAsFlowFile) { packager = null; } else if (accepts.isFlowFileV3Accepted()) { packager = new FlowFilePackagerV3(); } else if (accepts.isFlowFileV2Accepted()) { packager = new FlowFilePackagerV2(); } else if (accepts.isFlowFileV1Accepted()) { packager = new FlowFilePackagerV1(); } // if none of the above conditions is met, we should never get here, // because // we will have already verified that at least 1 of the FlowFile // packaging // formats is acceptable if sending as FlowFile. if (packager == null) { StreamUtils.copy(in, out); } else { final Map<String, String> flowFileAttributes; if (isDestinationLegacyNiFi) { // Old versions of NiFi expect nf.file.name and nf.file.path to // indicate filename & path; // in order to maintain backward compatibility, we copy the // filename & path to those attribute keys. flowFileAttributes = new HashMap<>(flowFile.getAttributes()); flowFileAttributes.put( "nf.file.name", flowFile.getAttribute(CoreAttributes.FILENAME.key())); flowFileAttributes.put( "nf.file.path", flowFile.getAttribute(CoreAttributes.PATH.key())); } else { flowFileAttributes = flowFile.getAttributes(); } packager.packageFlowFile( in, out, flowFileAttributes, flowFile.getSize()); } } } }); } out.flush(); } } }); entity.setChunked(context.getProperty(CHUNKED_ENCODING).asBoolean()); post.setEntity(entity); post.setConfig(requestConfig); final String contentType; if (sendAsFlowFile) { if (accepts.isFlowFileV3Accepted()) { contentType = APPLICATION_FLOW_FILE_V3; } else if (accepts.isFlowFileV2Accepted()) { contentType = APPLICATION_FLOW_FILE_V2; } else if (accepts.isFlowFileV1Accepted()) { contentType = APPLICATION_FLOW_FILE_V1; } else { logger.error( "Cannot send data to {} because the destination does not accept FlowFiles and this processor is " + "configured to deliver FlowFiles; rolling back session", new Object[] {url}); session.rollback(); context.yield(); return; } } else { final String attributeValue = toSend.get(0).getAttribute(CoreAttributes.MIME_TYPE.key()); contentType = (attributeValue == null) ? DEFAULT_CONTENT_TYPE : attributeValue; } final String attributeHeaderRegex = context.getProperty(ATTRIBUTES_AS_HEADERS_REGEX).getValue(); if (attributeHeaderRegex != null && !sendAsFlowFile && flowFileList.size() == 1) { final Pattern pattern = Pattern.compile(attributeHeaderRegex); final Map<String, String> attributes = flowFileList.get(0).getAttributes(); for (final Map.Entry<String, String> entry : attributes.entrySet()) { final String key = entry.getKey(); if (pattern.matcher(key).matches()) { post.setHeader(entry.getKey(), entry.getValue()); } } } post.setHeader(CONTENT_TYPE, contentType); post.setHeader(FLOWFILE_CONFIRMATION_HEADER, "true"); post.setHeader(PROTOCOL_VERSION_HEADER, PROTOCOL_VERSION); post.setHeader(TRANSACTION_ID_HEADER, transactionId); if (compressionLevel > 0 && accepts.isGzipAccepted()) { post.setHeader(GZIPPED_HEADER, "true"); } // Do the actual POST final String flowFileDescription = toSend.size() <= 10 ? toSend.toString() : toSend.size() + " FlowFiles"; final String uploadDataRate; final long uploadMillis; CloseableHttpResponse response = null; try { final StopWatch stopWatch = new StopWatch(true); response = client.execute(post); // consume input stream entirely, ignoring its contents. If we // don't do this, the Connection will not be returned to the pool EntityUtils.consume(response.getEntity()); stopWatch.stop(); uploadDataRate = stopWatch.calculateDataRate(bytesToSend); uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS); } catch (final IOException e) { logger.error( "Failed to Post {} due to {}; transferring to failure", new Object[] {flowFileDescription, e}); context.yield(); for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); } return; } finally { if (response != null) { try { response.close(); } catch (IOException e) { getLogger().warn("Failed to close HTTP Response due to {}", new Object[] {e}); } } } // If we get a 'SEE OTHER' status code and an HTTP header that indicates that the intent // of the Location URI is a flowfile hold, we will store this holdUri. This prevents us // from posting to some other webservice and then attempting to delete some resource to which // we are redirected final int responseCode = response.getStatusLine().getStatusCode(); final String responseReason = response.getStatusLine().getReasonPhrase(); String holdUri = null; if (responseCode == HttpServletResponse.SC_SEE_OTHER) { final Header locationUriHeader = response.getFirstHeader(LOCATION_URI_INTENT_NAME); if (locationUriHeader != null) { if (LOCATION_URI_INTENT_VALUE.equals(locationUriHeader.getValue())) { final Header holdUriHeader = response.getFirstHeader(LOCATION_HEADER_NAME); if (holdUriHeader != null) { holdUri = holdUriHeader.getValue(); } } } if (holdUri == null) { for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); logger.error( "Failed to Post {} to {}: sent content and received status code {}:{} but no Hold URI", new Object[] {flowFile, url, responseCode, responseReason}); session.transfer(flowFile, REL_FAILURE); } return; } } if (holdUri == null) { if (responseCode == HttpServletResponse.SC_SERVICE_UNAVAILABLE) { for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); logger.error( "Failed to Post {} to {}: response code was {}:{}; will yield processing, " + "since the destination is temporarily unavailable", new Object[] {flowFile, url, responseCode, responseReason}); session.transfer(flowFile, REL_FAILURE); } context.yield(); return; } if (responseCode >= 300) { for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); logger.error( "Failed to Post {} to {}: response code was {}:{}", new Object[] {flowFile, url, responseCode, responseReason}); session.transfer(flowFile, REL_FAILURE); } return; } logger.info( "Successfully Posted {} to {} in {} at a rate of {}", new Object[] { flowFileDescription, url, FormatUtils.formatMinutesSeconds(uploadMillis, TimeUnit.MILLISECONDS), uploadDataRate }); for (final FlowFile flowFile : toSend) { session .getProvenanceReporter() .send(flowFile, url, "Remote DN=" + dnHolder.get(), uploadMillis, true); session.transfer(flowFile, REL_SUCCESS); } return; } // // the response indicated a Hold URI; delete the Hold. // // determine the full URI of the Flow File's Hold; Unfortunately, the responses that are // returned have // changed over the past, so we have to take into account a few different possibilities. String fullHoldUri = holdUri; if (holdUri.startsWith("/contentListener")) { // If the Hold URI that we get starts with /contentListener, it may not really be // /contentListener, // as this really indicates that it should be whatever we posted to -- if posting directly to // the // ListenHTTP component, it will be /contentListener, but if posting to a proxy/load balancer, // we may // be posting to some other URL. fullHoldUri = url + holdUri.substring(16); } else if (holdUri.startsWith("/")) { // URL indicates the full path but not hostname or port; use the same hostname & port that we // posted // to but use the full path indicated by the response. int firstSlash = url.indexOf("/", 8); if (firstSlash < 0) { firstSlash = url.length(); } final String beforeSlash = url.substring(0, firstSlash); fullHoldUri = beforeSlash + holdUri; } else if (!holdUri.startsWith("http")) { // Absolute URL fullHoldUri = url + (url.endsWith("/") ? "" : "/") + holdUri; } final HttpDelete delete = new HttpDelete(fullHoldUri); delete.setHeader(TRANSACTION_ID_HEADER, transactionId); while (true) { try { final HttpResponse holdResponse = client.execute(delete); EntityUtils.consume(holdResponse.getEntity()); final int holdStatusCode = holdResponse.getStatusLine().getStatusCode(); final String holdReason = holdResponse.getStatusLine().getReasonPhrase(); if (holdStatusCode >= 300) { logger.error( "Failed to delete Hold that destination placed on {}: got response code {}:{}; routing to failure", new Object[] {flowFileDescription, holdStatusCode, holdReason}); for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); } return; } logger.info( "Successfully Posted {} to {} in {} milliseconds at a rate of {}", new Object[] {flowFileDescription, url, uploadMillis, uploadDataRate}); for (FlowFile flowFile : toSend) { session.getProvenanceReporter().send(flowFile, url); session.transfer(flowFile, REL_SUCCESS); } return; } catch (final IOException e) { logger.warn( "Failed to delete Hold that destination placed on {} due to {}", new Object[] {flowFileDescription, e}); } if (!isScheduled()) { context.yield(); logger.warn( "Failed to delete Hold that destination placed on {}; Processor has been stopped so routing FlowFile(s) to failure", new Object[] {flowFileDescription}); for (FlowFile flowFile : toSend) { flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); } return; } } }
@Override public boolean authenticate() { if (!super.authenticate()) { LOG.error( String.format( "blank username or password detected, no %s xword will be downloaded", this.getType())); return false; } final HttpUriRequest loginGet = RequestBuilder.get().setUri(NYT_LOGIN_URL).build(); final String loginPage; try (final CloseableHttpResponse getResponse = this.getHttpClient().execute(loginGet)) { loginPage = EntityUtils.toString(getResponse.getEntity()); } catch (final IOException e) { LOG.error("error while navigating to NYT login page", e); return false; } final String token; final String expires; try { final TagNode node = this.getCleaner().clean(loginPage); final Object[] foundNodes = node.evaluateXPath("//input[@name='token']"); if (foundNodes.length != 1) { this.throwLoginException( "unexpected login page, found %d hidden token input elements, expected 1", foundNodes.length); } final TagNode hiddenTokenInput = (TagNode) foundNodes[0]; token = hiddenTokenInput.getAttributeByName("value"); LOG.debug("found hidden input token {}", token); final Object[] foundExpiresNodes = node.evaluateXPath("//input[@name='expires']"); if (foundExpiresNodes.length != 1) { this.throwLoginException( "unexpected login page, found %d hidden token expiration input elements, expected 1", foundNodes.length); } final TagNode hiddenTokenExpiresInput = (TagNode) foundExpiresNodes[0]; expires = hiddenTokenExpiresInput.getAttributeByName("value"); LOG.debug("found hidden input token expiration {}", expires); } catch (LoginException | XPatherException e) { LOG.error("error while pulling login tokens from NYT login page", e); return false; } // @formatter:off final HttpUriRequest loginPost = RequestBuilder.post() .setUri("https://myaccount.nytimes.com/auth/login") .addParameter("is_continue", Boolean.FALSE.toString()) .addParameter("token", token) .addParameter("expires", expires) .addParameter("userid", this.getLoginInfo().getUsername()) .addParameter("password", this.getLoginInfo().getPassword()) .addParameter("remember", Boolean.TRUE.toString()) .build(); // @formatter:on try (CloseableHttpResponse postResponse = this.getHttpClient().execute(loginPost)) { // successful NYT login should give 302 status final int responseStatus = postResponse.getStatusLine().getStatusCode(); if (responseStatus != 302) { final String errorMessage = String.format("did not detect expected 302 redirect, got %d instead", responseStatus); throw new LoginException(errorMessage); } // successful NYT login redirects to the NYT homepage final Header location = postResponse.getFirstHeader("Location"); // have seen this redirect both with and without the final portion final Pattern expectedRedirectLocation = Pattern.compile("http://www.nytimes.com(\\?login=email)*"); final String actualRedirectLocation = location.getValue(); final Matcher matcher = expectedRedirectLocation.matcher(actualRedirectLocation); if (!matcher.matches()) { final String errorMessage = String.format( "redirect to unexpected URL, expected %s, found Location=%s instead", expectedRedirectLocation, actualRedirectLocation); throw new LoginException(errorMessage); } // successful NYT login should set a few cookies final Header[] cookies = postResponse.getHeaders("Set-Cookie"); if (cookies.length < 1) { throw new LoginException("no post login cookies set, login likely failed"); } } catch (final IOException | LoginException e) { LOG.error("error while logging in, e={}", e.getMessage()); return false; } LOG.info("successfully logged in to nyt"); return true; }
/** * Downloads to a directory represented by a {@link File} object, determining the file name from * the Content-Disposition header. * * @param url URL of file * @param base base directory in which the download is saved * @return the absolute file path of the downloaded file, or an empty string if the file could not * be downloaded */ public String download(String url, File base) { if (mHTTPClient == null) { mHTTPClient = createHTTPClient(mPrivateKey, mCertificate, mValidateCertificate); if (mHTTPClient == null) return ""; } LOGGER.info("downloading file from URL=" + url + "..."); mCurrentRequest = new HttpGet(url); // execute request CloseableHttpResponse response; try { response = mHTTPClient.execute(mCurrentRequest); } catch (IOException ex) { LOGGER.log(Level.WARNING, "can't execute request", ex); return ""; } try { int code = response.getStatusLine().getStatusCode(); // HTTP/1.1 200 OK -- other codes should throw Exceptions if (code != 200) { LOGGER.warning("invalid response code: " + code); return ""; } // get filename Header dispHeader = response.getFirstHeader("Content-Disposition"); if (dispHeader == null) { LOGGER.warning("no content header"); return ""; } String filename = parseContentDisposition(dispHeader.getValue()); // never trust incoming data filename = filename != null ? new File(filename).getName() : ""; if (filename.isEmpty()) { LOGGER.warning("no filename in content: " + dispHeader.getValue()); return ""; } // get file size long s = -1; Header lengthHeader = response.getFirstHeader("Content-Length"); if (lengthHeader == null) { LOGGER.warning("no length header"); } else { try { s = Long.parseLong(lengthHeader.getValue()); } catch (NumberFormatException ex) { LOGGER.log(Level.WARNING, "can' parse file size", ex); } } final long fileSize = s; mListener.updateProgress(s < 0 ? -2 : 0); // TODO should check for content-disposition parsing here // and choose another filename if necessary HttpEntity entity = response.getEntity(); if (entity == null) { LOGGER.warning("no entity in response"); return ""; } File destination = new File(base, filename); if (destination.exists()) { LOGGER.warning("file already exists: " + destination.getAbsolutePath()); return ""; } try (FileOutputStream out = new FileOutputStream(destination)) { CountingOutputStream cOut = new CountingOutputStream(out) { @Override protected synchronized void afterWrite(int n) { if (fileSize <= 0) return; // inform listener mListener.updateProgress((int) (this.getByteCount() / (fileSize * 1.0) * 100)); } }; entity.writeTo(cOut); } catch (IOException ex) { LOGGER.log(Level.WARNING, "can't download file", ex); return ""; } LOGGER.info("... download successful!"); return destination.getAbsolutePath(); } finally { try { response.close(); } catch (IOException ex) { LOGGER.log(Level.WARNING, "can't close response", ex); } } }
private static void assert404NotFound(CloseableHttpResponse res) { assertStatusLine(res, "HTTP/1.1 404 Not Found"); // Ensure that the 'Last-Modified' header does not exist. assertThat(res.getFirstHeader(HttpHeaders.LAST_MODIFIED), is(nullValue())); }