Java CachedUrl.getUnfilteredInputStream 예제들

프로그래밍 언어: Java

클래스/타입: CachedUrl

메소드/함수: getUnfilteredInputStream

hotexamples.com에서의 예제들: 3

Java CachedUrl.getUnfilteredInputStream - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 CachedUrl.getUnfilteredInputStream에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getUrl(13)

getArchivalUnit(5)

hasContent(4)

getContentType(3)

getUnfilteredInputStream(3)

release(3)

getProperties(2)

getContentSize(1)

getCuVersions(1)

openForHashing(1)

예제 #1

파일 보기

파일: ArcExporter.java 프로젝트: fkautz/cmusv-practicum

 protected void writeCu(CachedUrl cu) throws IOException {
   String url = cu.getUrl();
   long contentSize = cu.getContentSize();
   CIProperties props = cu.getProperties();
   long fetchTime = Long.parseLong(props.getProperty(CachedUrl.PROPERTY_FETCH_TIME));
   InputStream contentIn = cu.getUnfilteredInputStream();
   try {
     if (isResponse) {
       String hdrString = getHttpResponseString(cu);
       long size = contentSize + hdrString.length();
       InputStream headerIn = new ReaderInputStream(new StringReader(hdrString));
       InputStream concat = new SequenceInputStream(headerIn, contentIn);
       try {
         aw.write(xlateFilename(url), cu.getContentType(), getHostIp(), fetchTime, size, concat);
       } finally {
         IOUtil.safeClose(concat);
       }
     } else {
       aw.write(
           xlateFilename(url),
           cu.getContentType(),
           getHostIp(),
           fetchTime,
           cu.getContentSize(),
           contentIn);
     }
   } finally {
     AuUtil.safeRelease(cu);
   }
 }

예제 #2

파일 보기

파일: FuncSimulatedContent.java 프로젝트: Mvrooman/lockss-daemon

 private String getUrlContent(CachedUrl url) throws IOException {
   InputStream content = url.getUnfilteredInputStream();
   ByteArrayOutputStream baos = new ByteArrayOutputStream();
   StreamUtil.copy(content, baos);
   content.close();
   String contentStr = new String(baos.toByteArray());
   baos.close();
   return contentStr;
 }

예제 #3

파일 보기

파일: MaffeyArticleIteratorFactory.java 프로젝트: edina/lockss-daemon

    /*
     * In order to find full text PDF you need to find the citation_pdf_url meta tag in the
     * abstract html pull out the pdf url normalize it (reorder params...) and find the matching
     * cached URL
     */
    protected ArticleFiles processAbstract(CachedUrl absCu, Matcher absMat) {
      NodeList nl = null;
      ArticleFiles af = new ArticleFiles();
      if (absCu != null && absCu.hasContent()) {
        // TEMPORARY: set absCU as default full text CU in case there is
        // no PDF CU with content; the current metadata manager currently
        // uses only the full text CU, but this will change with the new
        // metadata schema that can have multiple CUs for an article.
        af.setFullTextCu(absCu);
        af.setRoleCu(ArticleFiles.ROLE_ABSTRACT, absCu);
        try {
          InputStreamSource is =
              new InputStreamSource(new Stream(absCu.getUnfilteredInputStream()));
          Page pg = new Page(is);
          Lexer lx = new Lexer(pg);
          Parser parser = new Parser(lx);
          Lexer.STRICT_REMARKS = false;
          NodeFilter nf =
              new NodeFilter() {
                public boolean accept(Node node) {
                  if (!(node instanceof MetaTag)) return false;
                  MetaTag meta = (MetaTag) node;
                  if (!"citation_pdf_url".equalsIgnoreCase(meta.getMetaTagName())) return false;
                  return true;
                }
              };
          nl = parser.extractAllNodesThatMatch(nf);
        } catch (ParserException e) {
          log.debug("Unable to parse abstract page html", e);
        } catch (UnsupportedEncodingException e) {
          log.debug("Bad encoding in abstact page html", e);
        } finally {
          absCu.release();
        }
      }
      try {
        if (nl != null) {
          if (nl.size() > 0) {
            // minimally encode URL to prevent URL constructor
            // from stripping trailing spaces
            String pdfUrlStr = ((MetaTag) nl.elementAt(0)).getMetaContent();
            URL pdfUrl = new URL(UrlUtil.minimallyEncodeUrl(pdfUrlStr));
            List<String> paramList = new ArrayList<String>();
            paramList.add("fileType");
            paramList.add("fileId");
            paramList.add("fileName");
            pdfUrl = reArrangeUrlParams(pdfUrl, paramList);

            if (!pdfUrl.getHost().startsWith("www.")) {
              pdfUrl = new URL(pdfUrl.getProtocol(), "www." + pdfUrl.getHost(), pdfUrl.getFile());
            }

            // note: must leave URL encoded because that's how we store URLs
            CachedUrl pdfCu = au.makeCachedUrl(pdfUrl.toString());
            if (pdfCu != null && pdfCu.hasContent()) {
              // replace absCU with pdfCU if exists and has content
              af.setFullTextCu(pdfCu);
              af.setRoleCu(ArticleFiles.ROLE_FULL_TEXT_PDF, pdfCu);
            }
          }
        }
      } catch (MalformedURLException e) {
        log.debug("Badly formatted pdf url link", e);
      } catch (IllegalArgumentException e) {
        log.debug("Badly formatted pdf url link", e);
      }

      return af;
    }