Java SiteMap.setProcessed примеры использования

Язык программирования: Java

Класс/Тип: SiteMap

Метод/Функция: setProcessed

Примеров на hotexamples.com: 3

Java SiteMap.setProcessed - 3 примера найдено. Это лучшие примеры Java кода для SiteMap.setProcessed, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

setType(4)

setProcessed(3)

convertToDate(2)

addPage(1)

addSiteMapUrl(1)

createTree(1)

finish(1)

getBaseUrl(1)

getCount(1)

getFullDateFormat(1)

getRoot(1)

getSiteMaps(1)

setRoot(1)

Пример #1

Показать файл

Файл: SiteMapParser.java Проект: rclaude/crawler-commons

  /**
   * Parse the XML document, looking for a <b>feed</b> element to determine if it's an <b>Atom
   * doc</b> <b>rss</b> to determine if it's an <b>RSS doc</b>.
   *
   * @param sitemapUrl
   * @param doc - XML document to parse
   * @throws UnknownFormatException if XML does not appear to be Atom or RSS
   */
  private SiteMap parseSyndicationFormat(URL sitemapUrl, Document doc)
      throws UnknownFormatException {

    SiteMap sitemap = new SiteMap(sitemapUrl);

    // See if this is an Atom feed by looking for "feed" element
    NodeList list = doc.getElementsByTagName("feed");
    if (list.getLength() > 0) {
      parseAtom(sitemap, (Element) list.item(0), doc);
      sitemap.setProcessed(true);
      return sitemap;
    } else {
      // See if it is a RSS feed by looking for a "channel" element. This avoids the issue
      // of having the outer tag named <rdf:RDF> that was causing this code to fail. Inside of
      // the <rss> or <rdf> tag is a <channel> tag, so we can use that.
      // See https://github.com/crawler-commons/crawler-commons/issues/87
      // and also RSS 1.0 specification http://web.resource.org/rss/1.0/spec
      list = doc.getElementsByTagName("channel");
      if (list.getLength() > 0) {
        parseRSS(sitemap, doc);
        sitemap.setProcessed(true);
        return sitemap;
      } else {
        throw new UnknownFormatException("Unknown syndication format at " + sitemapUrl);
      }
    }
  }

Пример #2

Показать файл

Файл: SiteMapParser.java Проект: rclaude/crawler-commons

  /**
   * Parse XML that contains a valid Sitemap. Example of a Sitemap: <?xml version="1.0"
   * encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <url>
   * <loc>http://www.example.com/</loc> <lastmod>2005-01-01</lastmod>
   * <changefreq>monthly</changefreq> <priority>0.8</priority> </url> <url> <loc
   * >http://www.example.com/catalog?item=12&amp;desc=vacation_hawaii</loc>
   * <changefreq>weekly</changefreq> </url> </urlset>
   *
   * @param doc
   */
  private SiteMap parseXmlSitemap(URL sitemapUrl, Document doc) {

    SiteMap sitemap = new SiteMap(sitemapUrl);
    sitemap.setType(SitemapType.XML);

    NodeList list = doc.getElementsByTagName("url");

    // Loop through the <url>s
    for (int i = 0; i < list.getLength(); i++) {

      Node n = list.item(i);
      if (n.getNodeType() == Node.ELEMENT_NODE) {
        Element elem = (Element) n;
        String lastMod = getElementValue(elem, "lastmod");
        String changeFreq = getElementValue(elem, "changefreq");
        String priority = getElementValue(elem, "priority");
        String loc = getElementValue(elem, "loc");

        addUrlIntoSitemap(loc, sitemap, lastMod, changeFreq, priority, i);
      }
    }

    sitemap.setProcessed(true);
    return sitemap;
  }

Пример #3

Показать файл

Файл: SiteMapParser.java Проект: rclaude/crawler-commons

  /**
   * Process a text-based Sitemap. Text sitemaps only list URLs but no priorities, last mods, etc.
   *
   * @param content
   * @throws IOException
   */
  private SiteMap processText(String sitemapUrl, byte[] content) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8"));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
      if (line.length() > 0 && i <= MAX_URLS) {
        addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
      }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
  }