// If there is a <title> element on the start page, use that as our AU
 // name.
 String recomputeRegName() {
   if (!isStarted()) {
     // This can get invoked (seveeral times, mostly from logging) before
     // enough mechanism has started to make it possible to resolve the CuUrl
     // below.
     return null;
   }
   try {
     CachedUrl cu = makeCachedUrl(m_registryUrl);
     if (cu == null) return null;
     URL cuUrl = CuUrl.fromCu(cu);
     Parser parser = new Parser(cuUrl.toString());
     NodeList nodelst = parser.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));
     Node nodes[] = nodelst.toNodeArray();
     recomputeRegName = false;
     if (nodes.length < 1) return null;
     // Get the first title found
     TitleTag tag = (TitleTag) nodes[0];
     if (tag == null) return null;
     return tag.getTitle();
   } catch (MalformedURLException e) {
     log.warning("recomputeRegName", e);
     return null;
   } catch (ParserException e) {
     if (e.getThrowable() instanceof FileNotFoundException) {
       log.warning("recomputeRegName: " + e.getThrowable().toString());
     } else {
       log.warning("recomputeRegName", e);
     }
     return null;
   }
 }
Ejemplo n.º 2
0
  private void buildUrlSets(String url) {

    try {
      outputMessage("\nFetching " + url, TEST_SUMMARY_MESSAGE);
      URL srcUrl = new URL(url);
      //       URLConnection conn = srcUrl.openConnection();
      //       String type = conn.getContentType();
      //       type = conn.getHeaderField("content-type");
      //       InputStream istr = conn.getInputStream();

      LockssUrlConnection conn = UrlUtil.openConnection(url, connectionPool);
      if (proxyHost != null) {
        conn.setProxy(proxyHost, proxyPort);
      }
      if (userAgent != null) {
        conn.setRequestProperty("user-agent", userAgent);
      }
      try {
        conn.execute();
        int resp = conn.getResponseCode();
        if (resp != 200) {
          outputMessage("Resp: " + resp + ": " + conn.getResponseMessage(), TEST_SUMMARY_MESSAGE);
          return;
        }
        depth_fetched[m_curDepth - 1]++;
        String cookies = conn.getResponseHeaderValue("Set-Cookie");
        if (cookies != null) {
          outputMessage("Cookies: " + cookies, PLAIN_MESSAGE);
        }
        String type = conn.getResponseContentType();
        if (type == null || !type.toLowerCase().startsWith("text/html")) {
          outputMessage("Type: " + type + ", not parsing", URL_SUMMARY_MESSAGE);
          return;
        }
        outputMessage("Type: " + type + ", extracting Urls", URL_SUMMARY_MESSAGE);
        InputStream istr = conn.getResponseInputStream();
        InputStreamReader reader = new InputStreamReader(istr);
        //       MyMockCachedUrl mcu = new MyMockCachedUrl(srcUrl.toString(), reader);
        GoslingHtmlLinkExtractor extractor = new GoslingHtmlLinkExtractor();
        extractor.extractUrls(null, istr, null, srcUrl.toString(), new MyLinkExtractorCallback());
        istr.close();
        depth_parsed[m_curDepth - 1]++;
      } finally {
        conn.release();
      }
    } catch (MalformedURLException murle) {
      murle.printStackTrace();
      outputErrResults(url, "Malformed URL:" + murle.getMessage());
    } catch (IOException ex) {
      ex.printStackTrace();
      outputErrResults(url, "IOException: " + ex.getMessage());
    }
  }
Ejemplo n.º 3
0
 private ExternalizableMap loadMap(String extMapName, ClassLoader loader)
     throws FileNotFoundException {
   String first = null;
   String next = extMapName;
   List<String> urls = new ArrayList<String>();
   ExternalizableMap res = null;
   while (next != null) {
     // convert the plugin class name to an xml file name
     String mapFile = next.replace('.', '/') + MAP_SUFFIX;
     URL url = loader.getResource(mapFile);
     if (url != null && urls.contains(url.toString())) {
       throw new PluginException.InvalidDefinition("Plugin inheritance loop: " + next);
     }
     // load into map
     ExternalizableMap oneMap = new ExternalizableMap();
     oneMap.loadMapFromResource(mapFile, loader);
     urls.add(url.toString());
     // apply overrides one plugin at a time in inheritance chain
     processOverrides(oneMap);
     if (res == null) {
       res = oneMap;
     } else {
       for (Map.Entry ent : oneMap.entrySet()) {
         String key = (String) ent.getKey();
         Object val = ent.getValue();
         if (!res.containsKey(key)) {
           res.setMapElement(key, val);
         }
       }
     }
     if (oneMap.containsKey(KEY_PLUGIN_PARENT)) {
       next = oneMap.getString(KEY_PLUGIN_PARENT);
     } else {
       next = null;
     }
   }
   loadedFromUrls = urls;
   return res;
 }