Ejemplo n.º 1
0
 @Override
 public String doInBackground(String... params) {
   HttpDownloader loader = new HttpDownloader();
   String result = loader.download(params[0]);
   System.out.println("xml------>" + result);
   return result;
 }
Ejemplo n.º 2
0
  public WordUrl(String danci) {
    HttpDownloader httpDownloader = new HttpDownloader();

    String lrc =
        httpDownloader.download(
            33,
            "http://dict.youdao.com/m/search?q=" + danci + "&keyfrom=smartresult.dict.m#hanhan",
            -1);
    lrc = lrc.replaceAll("有道", "易记");
    lrc = lrc.replaceAll("youdao", "yiji");
    lrc = lrc.replaceAll("网易公司", "周卓潜出品");
    meaning = lrc;
  }
Ejemplo n.º 3
0
 protected SkyValue compute(Environment env, Rule rule) throws RepositoryFunctionException {
   // The output directory is always under .external-repository (to stay out of the way of
   // artifacts from this repository) and uses the rule's name to avoid conflicts with other
   // remote repository rules. For example, suppose you had the following WORKSPACE file:
   //
   // http_archive(name = "png", url = "http://example.com/downloads/png.tar.gz", sha256 = "...")
   //
   // This would download png.tar.gz to .external-repository/png/png.tar.gz.
   Path outputDirectory = getExternalRepositoryDirectory().getRelative(rule.getName());
   FileValue directoryValue = createDirectory(outputDirectory, env);
   if (directoryValue == null) {
     return null;
   }
   AggregatingAttributeMapper mapper = AggregatingAttributeMapper.of(rule);
   URL url = null;
   try {
     url = new URL(mapper.get("url", Type.STRING));
   } catch (MalformedURLException e) {
     throw new RepositoryFunctionException(
         new EvalException(rule.getLocation(), "Error parsing URL: " + e.getMessage()),
         Transience.PERSISTENT);
   }
   String sha256 = mapper.get("sha256", Type.STRING);
   HttpDownloader downloader = new HttpDownloader(url, sha256, outputDirectory);
   try {
     Path archiveFile = downloader.download();
     outputDirectory =
         DecompressorFactory.create(
                 rule.getTargetKind(), rule.getName(), archiveFile, outputDirectory)
             .decompress();
   } catch (IOException e) {
     // Assumes all IO errors transient.
     throw new RepositoryFunctionException(e, Transience.TRANSIENT);
   } catch (DecompressorException e) {
     throw new RepositoryFunctionException(new IOException(e.getMessage()), Transience.TRANSIENT);
   }
   return new RepositoryValue(outputDirectory, directoryValue);
 }
Ejemplo n.º 4
0
  /**
   * Download the file and extract content informations
   *
   * @param httpDownloader
   */
  public DownloadItem download(HttpDownloader httpDownloader) {
    synchronized (this) {
      InputStream is = null;
      DownloadItem downloadItem = null;
      try {
        URL url = urlItem.getURL();
        if (url == null) throw new MalformedURLException("Malformed URL: " + urlItem.getUrl());
        // URL normalisation
        URI uri = url.toURI();
        url = uri.toURL();

        credentialItem = credentialManager == null ? null : credentialManager.matchCredential(url);

        String externalFormUrl = url.toExternalForm();
        downloadItem = crawlCacheManager.loadCache(uri);

        boolean fromCache = (downloadItem != null);

        if (!fromCache) {

          List<CookieItem> cookieList = cookieManager.getItems(externalFormUrl);
          List<HeaderItem> headerList = headerManager.getItems(externalFormUrl);
          downloadItem = httpDownloader.get(uri, credentialItem, headerList, cookieList);
        } else if (Logging.isDebug) Logging.debug("Crawl cache deliver: " + uri);

        urlItem.setContentDispositionFilename(downloadItem.getContentDispositionFilename());

        urlItem.setContentBaseType(downloadItem.getContentBaseType());

        urlItem.setContentTypeCharset(downloadItem.getContentTypeCharset());

        urlItem.setContentEncoding(downloadItem.getContentEncoding());

        urlItem.setContentLength(downloadItem.getContentLength());

        urlItem.setLastModifiedDate(downloadItem.getLastModified());

        urlItem.setFetchStatus(FetchStatus.FETCHED);

        urlItem.setHeaders(downloadItem.getHeaders());

        Integer code = downloadItem.getStatusCode();
        if (code == null) throw new IOException("Http status is null");

        urlItem.setResponseCode(code);
        redirectUrlLocation = downloadItem.getRedirectLocation();
        if (redirectUrlLocation != null)
          urlItem.setRedirectionUrl(redirectUrlLocation.toURL().toExternalForm());

        urlItem.setBacklinkCount(config.getUrlManager().countBackLinks(urlItem.getUrl()));

        if (code >= 200 && code < 300) {
          if (!fromCache) is = crawlCacheManager.storeCache(downloadItem);
          else is = downloadItem.getContentInputStream();
          parseContent(is);
        } else if (code == 301) {
          urlItem.setFetchStatus(FetchStatus.REDIR_PERM);
        } else if (code > 301 && code < 400) {
          urlItem.setFetchStatus(FetchStatus.REDIR_TEMP);
        } else if (code >= 400 && code < 500) {
          urlItem.setFetchStatus(FetchStatus.GONE);
        } else if (code >= 500 && code < 600) {
          urlItem.setFetchStatus(FetchStatus.HTTP_ERROR);
        }
      } catch (FileNotFoundException e) {
        Logging.info("FileNotFound: " + urlItem.getUrl());
        urlItem.setFetchStatus(FetchStatus.GONE);
        setError("FileNotFound: " + urlItem.getUrl());
      } catch (LimitException e) {
        Logging.warn(e.toString() + " (" + urlItem.getUrl() + ")");
        urlItem.setFetchStatus(FetchStatus.SIZE_EXCEED);
        setError(e.getMessage());
      } catch (InstantiationException e) {
        Logging.error(e.getMessage(), e);
        urlItem.setParserStatus(ParserStatus.PARSER_ERROR);
        setError(e.getMessage());
      } catch (IllegalAccessException e) {
        Logging.error(e.getMessage(), e);
        urlItem.setParserStatus(ParserStatus.PARSER_ERROR);
        setError(e.getMessage());
      } catch (ClassNotFoundException e) {
        Logging.error(e.getMessage(), e);
        urlItem.setParserStatus(ParserStatus.PARSER_ERROR);
        setError(e.getMessage());
      } catch (URISyntaxException e) {
        Logging.warn(e.getMessage(), e);
        urlItem.setFetchStatus(FetchStatus.URL_ERROR);
        setError(e.getMessage());
      } catch (MalformedURLException e) {
        Logging.warn(e.getMessage(), e);
        urlItem.setFetchStatus(FetchStatus.URL_ERROR);
        setError(e.getMessage());
      } catch (IOException e) {
        Logging.error(e.getMessage(), e);
        urlItem.setFetchStatus(FetchStatus.ERROR);
        setError(e.getMessage());
      } catch (IllegalArgumentException e) {
        Logging.error(e.getMessage(), e);
        urlItem.setFetchStatus(FetchStatus.ERROR);
        setError(e.getMessage());
      } catch (Exception e) {
        Logging.error(e.getMessage(), e);
        urlItem.setFetchStatus(FetchStatus.ERROR);
        setError(e.getMessage());
      } finally {
        IOUtils.close(is);
      }
      return downloadItem;
    }
  }
Ejemplo n.º 5
0
 private String downloadXML() {
   HttpDownloader httpDownloader = new HttpDownloader();
   String result = httpDownloader.download(AppConstant.URL.BASE_URL + "resources.xml");
   return result;
 }