/** * Comparison that does not consider Ref. * * @param url1 the url1 * @param url2 the url2 * @return true, if successful */ public static boolean sameNoRefURL(URL url1, URL url2) { return Objects.equals(url1.getHost(), url2.getHost()) && Objects.equals(url1.getProtocol(), url2.getProtocol()) && (url1.getPort() == url2.getPort()) && Objects.equals(url1.getFile(), url2.getFile()) && Objects.equals(url1.getUserInfo(), url2.getUserInfo()); }
private String getPolitenessKey(URL u) { String key = null; if (QUEUE_MODE_IP.equalsIgnoreCase(queueMode)) { try { final InetAddress addr = InetAddress.getByName(u.getHost()); key = addr.getHostAddress(); } catch (final UnknownHostException e) { // unable to resolve it, so don't fall back to host name LOG.warn("Unable to resolve: {}, skipping.", u.getHost()); return null; } } else if (QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) { key = PaidLevelDomain.getPLD(u.getHost()); if (key == null) { LOG.warn("Unknown domain for url: {}, using hostname as key", u.toExternalForm()); key = u.getHost(); } } else { key = u.getHost(); if (key == null) { LOG.warn("Unknown host for url: {}, using URL string as key", u.toExternalForm()); key = u.toExternalForm(); } } return key.toLowerCase(Locale.ROOT); }
/** * to send Head request to the server to check for content type and content size * * @param current_url * @return */ public boolean is_valid_file(String current_url) throws IOException { // System.out.print("[Output from log4j] Checking validity for url +" + current_url); // create and send HEAD request // // // // System.out.println("[Output from log4j] before head request + " + current_url); this.response_headers = crawler_client.fetch_head_response_url(current_url); // // // // System.out.println("[Output from log4j] after head request + " + // response_headers.size()); // // // // System.out.println("[Output from log4j] After fetching response header in // is_valid_file"); // base case - no response headers recieved from the server if (this.response_headers == null) return false; // check for location header if (this.response_headers.containsKey("location")) { this.is_reloc = true; String re_loc = this.response_headers.get("location"); if (re_loc.startsWith("/")) { URL url_obj = new URL(current_url); String path = url_obj.getPath(); String abs_reloc; if (path.endsWith(".xml") || path.endsWith(".html") || path.endsWith("htm")) path = path.substring(0, path.lastIndexOf("/")); if (path.endsWith("/")) abs_reloc = url_obj.getProtocol() + "://" + url_obj.getHost() + path.substring(0, path.length() - 1) + re_loc; else abs_reloc = url_obj.getProtocol() + "://" + url_obj.getHost() + path + re_loc; System.err.println("[Output from log4j] Found Relocation url +" + abs_reloc); WebURLQueue queue = new WebURLQueue(); queue.addToQueue(abs_reloc); return false; } else { WebURLQueue queue = new WebURLQueue(); queue.addToQueue(re_loc); return false; } } // if content type is not present in response header if (!(this.response_headers.containsKey("content-type"))) return false; // check valid content types this.content_type = this.response_headers.get("content-type"); // if valid content type if (!content_type.equals("text/xml") && !content_type.equals("text/html") && !content_type.endsWith("+xml") && !content_type.equals("application/xml")) return false; // // // // System.out.println("[Output from log4j] Chheck till Content type"); // check content-length exists if (this.response_headers.containsKey("content-length")) { this.content_length = Double.parseDouble(this.response_headers.get("content-length")); // checking allowed content-length for the document if (this.content_length > (XPathCrawler.maxsize_doc * 1024 * 1024)) return false; } // // // // System.out.println("[Output from log4j] Chheck till Content Length"); return true; }
@Override public boolean isValidAuthority(URL authorizationEndpoint) { // For comparison purposes, convert to lowercase Locale.US // getProtocol returns scheme and it is available if it is absolute url // Authority is in the form of https://Instance/tenant/somepath if (authorizationEndpoint != null && !StringExtensions.IsNullOrBlank(authorizationEndpoint.getHost()) && authorizationEndpoint.getProtocol().equals("https") && StringExtensions.IsNullOrBlank(authorizationEndpoint.getQuery()) && StringExtensions.IsNullOrBlank(authorizationEndpoint.getRef()) && !StringExtensions.IsNullOrBlank(authorizationEndpoint.getPath())) { if (UrlExtensions.isADFSAuthority(authorizationEndpoint)) { throw new AuthenticationException(ADALError.DISCOVERY_NOT_SUPPORTED); } else if (sValidHosts.contains(authorizationEndpoint.getHost().toLowerCase(Locale.US))) { // host can be the instance or inside the validated list. // Valid hosts will help to skip validation if validated before // call Callback and skip the look up return true; } else { // Only query from Prod instance for now, not all of the // instances in the list return queryInstance(authorizationEndpoint); } } return false; }
public File downloadFile(URL url) throws IOException { String inputLine; BufferedReader br = null; String fileName = docs + url.getHost() + url.getPath(); File f = new File(new File(fileName).getParent()); f.mkdirs(); File fileToSave = new File(docs + url.getHost() + url.getFile()); if (!fileToSave.exists()) { fileToSave.createNewFile(); } FileWriter fw = new FileWriter(fileToSave.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fw); // StringBuilder sb = new StringBuilder(); try { URLConnection uConn = url.openConnection(); br = new BufferedReader(new InputStreamReader(uConn.getInputStream())); while ((inputLine = br.readLine()) != null) { bw.write(inputLine); } } catch (IOException e) { System.out.println("URL does not exist"); return null; } finally { br.close(); bw.close(); } return fileToSave; }
@Test public void testGetURLPrincipal() throws KettleDatabaseException, MalformedURLException { String testHostname = "testHostname"; int port = 9429; String testDbName = "testDbName"; impalaDatabaseMeta.getAttributes().put("principal", "testP"); String urlString = impalaDatabaseMeta.getURL(testHostname, "" + port, testDbName); assertTrue(urlString.startsWith(ImpalaDatabaseMeta.URL_PREFIX)); // Use known prefix urlString = "http://" + urlString.substring(ImpalaDatabaseMeta.URL_PREFIX.length()); URL url = new URL(urlString); assertEquals(testHostname, url.getHost()); assertEquals(port, url.getPort()); assertEquals("/" + testDbName, url.getPath()); impalaDatabaseMeta.getAttributes().remove("principal"); impalaDatabaseMeta .getAttributes() .put( ImpalaDatabaseMeta.ATTRIBUTE_PREFIX_EXTRA_OPTION + impalaDatabaseMeta.getPluginId() + ".principal", "testP"); urlString = impalaDatabaseMeta.getURL(testHostname, "" + port, testDbName); assertTrue(urlString.startsWith(ImpalaDatabaseMeta.URL_PREFIX)); // Use known prefix urlString = "http://" + urlString.substring(ImpalaDatabaseMeta.URL_PREFIX.length()); url = new URL(urlString); assertEquals(testHostname, url.getHost()); assertEquals(port, url.getPort()); assertEquals("/" + testDbName, url.getPath()); }
/** * Checks if a domain should be filtered or not: returns true if the target domain ends with the * comparison domain and if supplied, target path begins with the comparison path * * @param target URL to check * @param strings The URLs to check against * @return If the target is covered by any strings * @throws MalformedURLException */ public static boolean isDomain(String target, String[] strings) throws MalformedURLException { URL domain = new URL(target); for (String s : strings) { if (!s.contains("/")) { if (ContentType.hostContains(domain.getHost(), s)) { return true; } else { continue; } } if (!s.contains("://")) { s = "http://" + s; } try { URL comparison = new URL(s.toLowerCase()); if (ContentType.hostContains(domain.getHost(), comparison.getHost()) && domain.getPath().startsWith(comparison.getPath())) { return true; } } catch (MalformedURLException ignored) { } } return false; }
private String translateLocationUrl( URL locationUrl, URL proxiedHostUrl, URL requestedHost, String requestedContext, String proxiedRootPath) { StringBuilder buffer = new StringBuilder(); if (locationUrl == null) { return null; } if (StringUtilities.isEmpty(locationUrl.getHost())) { return requestedContext; } if (shouldRewriteLocation(locationUrl, proxiedHostUrl, requestedHost)) { // location header contains our host info buffer.append(requestedHost.getProtocol()).append("://").append(requestedHost.getHost()); if (requestedHost.getPort() != DEFAULT_HTTP_PORT) { buffer.append(":").append(requestedHost.getPort()); } buffer.append(fixPathPrefix(locationUrl.getFile(), requestedContext, proxiedRootPath)); } return buffer.length() == 0 ? locationUrl.toExternalForm() : buffer.toString(); }
private static Registry getRegistry(URL u) throws RemoteException { if (u.getPort() == -1) { return (LocateRegistry.getRegistry(u.getHost())); } else { return (LocateRegistry.getRegistry(u.getHost(), u.getPort())); } }
public HttpClient(String urlsString) throws IOException { URL url = new URL(urlsString); int port = url.getPort(); String urlPath = url.getPath(); s = new Socket(url.getHost(), port == -1 ? 80 : port); output = s.getOutputStream(); PrintWriter pw = new PrintWriter(output, false); pw.println("HEAD " + urlPath + " HTTP/1.1"); pw.println("Host: " + url.getHost()); pw.println("User-Agent: " + CrawlerMain.USER_AGENT); pw.println("Accept-Language: en-US,en;q=1.0"); pw.println("Accept: " + ACCEPT); pw.println("Connection: close"); pw.println(""); pw.flush(); inputStream = s.getInputStream(); bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); String firstLine = bufferedReader.readLine(); // System.out.println(thisURL + "firstLine: " + firstLine); String statusCode = firstLine.split(" ")[1]; if (statusCode.equals("301") || statusCode.equals("302")) { redirect = true; } String inputLine; StringBuilder sb = new StringBuilder(); while ((inputLine = bufferedReader.readLine()) != null) { sb.append(inputLine); sb.append(System.lineSeparator()); } }
@Override protected HttpURLConnection createConnection(URL url) throws IOException { if ("https".equals(url.getProtocol()) && socketFactoryMap.containsKey(url.getHost())) { HttpsURLConnection connection = (HttpsURLConnection) new OkUrlFactory(okHttpClient).open(url); connection.setSSLSocketFactory(socketFactoryMap.get(url.getHost())); return connection; } else { return new OkUrlFactory(okHttpClient).open(url); } }
/* this code is workaround for subtle bug/feature in JDK1.3.1 and 1.4, related to loading applets behind proxy */ protected PermissionCollection getPermissions(CodeSource codesource) { PermissionCollection sysPerms = null; Policy policy = (Policy) AccessController.doPrivileged( new PrivilegedAction() { public Object run() { return Policy.getPolicy(); } }); if (policy != null) sysPerms = policy.getPermissions(new CodeSource(null, null)); else sysPerms = new Permissions(); final PermissionCollection perms = sysPerms; if (base != null && base.getHost() != null) perms.add(new SocketPermission(base.getHost() + ":1-", "accept,connect,resolve")); URL url = codesource.getLocation(); if (url.getProtocol().equals("file")) { String path = url.getFile().replace('/', File.separatorChar); if (!path.endsWith(File.separator)) { int endIndex = path.lastIndexOf(File.separatorChar); if (endIndex != -1) { path = path.substring(0, endIndex + 1) + "-"; perms.add(new FilePermission(path, "read")); } } perms.add(new SocketPermission("localhost", "connect,accept")); AccessController.doPrivileged( new PrivilegedAction() { public Object run() { try { String host = InetAddress.getLocalHost().getHostName(); perms.add(new SocketPermission(host, "connect,accept")); } catch (UnknownHostException uhe) { } return null; } }); if (base.getProtocol().equals("file")) { String bpath = base.getFile().replace('/', File.separatorChar); if (bpath.endsWith(File.separator)) { bpath += "-"; } perms.add(new FilePermission(bpath, "read")); } } // for (Enumeration e=perms.elements();e.hasMoreElements();) // System.err.println("p="+e.nextElement()); return perms; }
private T requestInfo(URI baseUrl, RenderingContext context) throws IOException, URISyntaxException, ParserConfigurationException, SAXException { URL url = loader.createURL(baseUrl, context); GetMethod method = null; try { final InputStream stream; if ((url.getProtocol().equals("http") || url.getProtocol().equals("https")) && context.getConfig().localHostForwardIsFrom(url.getHost())) { String scheme = url.getProtocol(); final String host = url.getHost(); if (url.getProtocol().equals("https") && context.getConfig().localHostForwardIsHttps2http()) { scheme = "http"; } URL localUrl = new URL(scheme, "localhost", url.getPort(), url.getFile()); HttpURLConnection connexion = (HttpURLConnection) localUrl.openConnection(); connexion.setRequestProperty("Host", host); for (Map.Entry<String, String> entry : context.getHeaders().entrySet()) { connexion.setRequestProperty(entry.getKey(), entry.getValue()); } stream = connexion.getInputStream(); } else { method = new GetMethod(url.toString()); for (Map.Entry<String, String> entry : context.getHeaders().entrySet()) { method.setRequestHeader(entry.getKey(), entry.getValue()); } context.getConfig().getHttpClient(baseUrl).executeMethod(method); int code = method.getStatusCode(); if (code < 200 || code >= 300) { throw new IOException( "Error " + code + " while reading the Capabilities from " + url + ": " + method.getStatusText()); } stream = method.getResponseBodyAsStream(); } final T result; try { result = loader.parseInfo(stream); } finally { stream.close(); } return result; } finally { if (method != null) { method.releaseConnection(); } } }
public ChanRipper(URL url, AbstractStorage storage) throws IOException { super(url, storage); for (ChanSite _chanSite : explicit_domains) { if (_chanSite.domains.contains(url.getHost())) { chanSite = _chanSite; generalChanSite = false; } } if (chanSite == null) { chanSite = new ChanSite(Arrays.asList(url.getHost())); } }
/** * Whether the requested redirect URI "matches" the specified redirect URI. For a URL, this * implementation tests if the user requested redirect starts with the registered redirect, so it * would have the same host and root path if it is an HTTP URL. * * <p>For other (non-URL) cases, such as for some implicit clients, the redirect_uri must be an * exact match. * * @param requestedRedirect The requested redirect URI. * @param redirectUri The registered redirect URI. * @return Whether the requested redirect URI "matches" the specified redirect URI. */ protected boolean redirectMatches(String requestedRedirect, String redirectUri) { try { URL req = new URL(requestedRedirect); URL reg = new URL(redirectUri); if (reg.getProtocol().equals(req.getProtocol()) && reg.getHost().equals(req.getHost())) { return requestedRedirect.startsWith(redirectUri); } } catch (MalformedURLException e) { } return requestedRedirect.equals(redirectUri); }
/** * Checks the URL to see if this repository refers to an external repository * * @param originalRepository * @return true if external. */ static boolean isExternalRepo(ArtifactRepository originalRepository) { try { URL url = new URL(originalRepository.getUrl()); return !(url.getHost().equals("localhost") || url.getHost().equals("127.0.0.1") || url.getProtocol().equals("file")); } catch (MalformedURLException e) { // bad url just skip it here. It should have been validated already, but the wagon lookup will // deal with it return false; } }
public static String convertHref(URL baseURL, String href) { if (href == null) return href; if (href.startsWith("/")) return baseURL.getProtocol() + "://" + baseURL.getHost() + href; if (!href.startsWith("http://")) return baseURL.getProtocol() + "://" + baseURL.getHost() + "/" + baseURL.getPath() + "/" + href; return href; }
private boolean shouldRewriteLocation(URL locationUrl, URL proxiedHostUrl, URL requestedHost) { if (proxiedHostUrl == null || locationUrl.getHost().equals(proxiedHostUrl.getHost()) && getPort(locationUrl) == getPort(proxiedHostUrl)) { return true; } if (locationUrl.getHost().equals(requestedHost.getHost()) && getPort(locationUrl) == getPort(requestedHost)) { return true; } return false; }
private static void checkUrlIsSecure(String url) { try { URL parsed = new URL(url); if (!parsed.getProtocol().toLowerCase().equals("https") && !parsed.getHost().toLowerCase().endsWith("corp.google.com") && !parsed.getHost().startsWith("192.168.0")) { if (parsed.getHost().startsWith("127.0.0") && PlayUtils.isTestDevice()) { return; } throw new RuntimeException("Insecure URL: " + url); } } catch (MalformedURLException e) { PlayCommonLog.d("Cannot parse URL: " + url, new Object[0]); } }
public String evaluate(String urlStr, String partToExtract) { if (urlStr == null || partToExtract == null) { return null; } if (lastUrlStr == null || !urlStr.equals(lastUrlStr)) { try { url = new URL(urlStr); } catch (Exception e) { return null; } } lastUrlStr = urlStr; if (partToExtract.equals("HOST")) return url.getHost(); if (partToExtract.equals("PATH")) return url.getPath(); if (partToExtract.equals("QUERY")) return url.getQuery(); if (partToExtract.equals("REF")) return url.getRef(); if (partToExtract.equals("PROTOCOL")) return url.getProtocol(); if (partToExtract.equals("FILE")) return url.getFile(); if (partToExtract.equals("AUTHORITY")) return url.getAuthority(); if (partToExtract.equals("USERINFO")) return url.getUserInfo(); return null; }
@Override public String getConfirmationMessage(URL url) { if (url != null) { String urlString = url.toExternalForm(); if (urlString.matches(PATTERN_OSM_API_URL)) { // TODO: proper i18n after stabilization String message = "<ul><li>" + tr("OSM Server URL:") + " " + url.getHost() + "</li><li>" + tr("Command") + ": " + url.getPath() + "</li>"; if (url.getQuery() != null) { message += "<li>" + tr("Request details: {0}", url.getQuery().replaceAll(",\\s*", ", ")) + "</li>"; } message += "</ul>"; return message; } // TODO: other APIs } return null; }
public static void setCookie(URL url, String s) { Cookie cookie = new Cookie(); String remaining = s.trim(); while (remaining.length() > 0) { int index = remaining.indexOf('='); if (index < 0) { cookie.set(remaining, null); break; } String key = remaining.substring(0, index); remaining = remaining.substring(index + 1); index = remaining.indexOf(';'); String value; if (index < 0) { value = remaining; cookie.set(key, value); break; } else { value = remaining.substring(0, index); cookie.set(key, value); remaining = remaining.substring(index + 1).trim(); } } if (cookie.domain == null) cookie.domain = url.getHost(); if (cookie.path == null) { // URL.getPath() is only available in Java 1.3! String file = url.getFile(); int index = file.lastIndexOf('?'); if (index >= 0) cookie.path = file.substring(0, index); else cookie.path = file; } if (cookie.isValid()) addCookie(cookie); }
public Request authenticate(Proxy paramProxy, Response paramResponse) { List localList = paramResponse.challenges(); paramResponse = paramResponse.request(); URL localURL = paramResponse.url(); int i = 0; int j = localList.size(); while (i < j) { Object localObject = (Challenge) localList.get(i); if ("Basic".equalsIgnoreCase(((Challenge) localObject).getScheme())) { localObject = java.net.Authenticator.requestPasswordAuthentication( localURL.getHost(), getConnectToInetAddress(paramProxy, localURL), localURL.getPort(), localURL.getProtocol(), ((Challenge) localObject).getRealm(), ((Challenge) localObject).getScheme(), localURL, Authenticator.RequestorType.SERVER); if (localObject != null) { paramProxy = Credentials.basic( ((PasswordAuthentication) localObject).getUserName(), new String(((PasswordAuthentication) localObject).getPassword())); return paramResponse.newBuilder().header("Authorization", paramProxy).build(); } } i += 1; } return null; }
public URLInfo(URL url) { this.url = url; this.hostName = url.getHost(); this.pathName = url.getPath(); this.protocol = url.getProtocol(); this.paramMap = this.urlParams(url.getQuery()); }
public URL fixLocalhost(URL url) throws MalformedURLException { URL fixedUrl = url; if (url.getHost().contains(LOCALHOST_ADDRESS)) { fixedUrl = new URL(fixedUrl.toExternalForm().replace(LOCALHOST_ADDRESS, LOCALHOST_HOSTNAME)); } return fixedUrl; }
public static void setAZTracker(URL tracker_url, boolean az_tracker) { String key = tracker_url.getHost() + ":" + tracker_url.getPort(); synchronized (az_trackers) { boolean changed = false; if (az_trackers.get(key) == null) { if (az_tracker) { az_trackers.put(key, new Long(SystemTime.getCurrentTime())); changed = true; } } else { if (!az_tracker) { if (az_trackers.remove(key) != null) { changed = true; } } } if (changed) { COConfigurationManager.setParameter("Tracker Client AZ Instances", az_trackers); } } }
/** * 从网址里面抽取链接 * * @return 链接的集合 */ public static List<String> getUrlsByPage(String str) { List<String> urls = new ArrayList<String>(); try { URL url = new URL(str); int end = 0; String host = url.getHost(); Document doc = Jsoup.parse(url, 30000); Elements links = doc.select("a"); String href = null; for (Element link : links) { href = link.attr("href"); if (href.startsWith(HTTP)) { urls.add(href); } else if (href.startsWith("/")) { urls.add(HTTP + host + href); } else { if (end > 0) { urls.add(str + href); } else { urls.add(str + href); } } } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return urls; }
/** * Gets the base directory where class <code>c</code> resides. * * @param c a class * @return the base directory */ public static URL getBaseDir(Class<?> c) { try { String className = c.getCanonicalName(); if (className == null) { className = c.getName(); } String pathToClass = "/" + className.replace(".", "/") + ".class"; URL url = c.getResource(pathToClass); String protocol = url.getProtocol().toLowerCase(); String host = url.getHost().toLowerCase(); String dirString = url.getFile(); int classNameIndex = dirString.indexOf(pathToClass); String basePathString = dirString.substring(0, classNameIndex); if (basePathString.endsWith("/bin") || basePathString.endsWith("/bin/") || basePathString.endsWith("/build") || basePathString.endsWith("/build/")) { basePathString = (new File(basePathString)).getParent(); } URL basePathURL = new URL(protocol, host, basePathString); return basePathURL; } catch (Exception e) { e.printStackTrace(); return null; } }
/** * Gets the application base directory. * * @return the base directory */ public static URL getBaseDir() { try { Class<?> c = (new Object() { public String toString() { return super.toString(); } }) .getClass(); /* We're using the classloader to determine the base URL. */ ClassLoader cl = c.getClassLoader(); URL url = cl.getResource("."); if (url == null) { return getBaseDir(c); } String protocol = url.getProtocol().toLowerCase(); String host = url.getHost().toLowerCase(); String basePathString = url.getFile(); if (basePathString.endsWith("/bin") || basePathString.endsWith("/bin/") || basePathString.endsWith("/build") || basePathString.endsWith("/build/")) { basePathString = (new File(basePathString)).getParent(); } return new URL(protocol, host, basePathString); } catch (Exception e) { e.printStackTrace(); return null; } }
private boolean processURL(URL url, String baseDir, StatusWindow status) throws IOException { if (processedLinks.contains(url)) { return false; } else { processedLinks.add(url); } URLConnection connection = url.openConnection(); InputStream in = new BufferedInputStream(connection.getInputStream()); ArrayList list = processPage(in, baseDir, url); if ((status != null) && (list.size() > 0)) { status.setMaximum(list.size()); } for (int i = 0; i < list.size(); i++) { if (status != null) { status.setMessage(Utils.trimFileName(list.get(i).toString(), 40), i); } if ((!((String) list.get(i)).startsWith("RUN")) && (!((String) list.get(i)).startsWith("SAVE")) && (!((String) list.get(i)).startsWith("LOAD"))) { processURL( new URL(url.getProtocol(), url.getHost(), url.getPort(), (String) list.get(i)), baseDir, status); } } in.close(); return true; }