Example #1
0
  public void CrawlRT(String RTPage) throws IOException {
    ArrayList<String> t = new ArrayList<String>();
    String crawlData;
    String crawlData2;
    String crawlData3;

    FileReader freader = new FileReader("Crawl.txt");
    BufferedReader br = new BufferedReader(freader);
    FileReader freader2 = new FileReader("Tocrawl.txt");
    BufferedReader br2 = new BufferedReader(freader2);
    FileWriter fwriter2 = new FileWriter("Tocrawl.txt", true);
    BufferedWriter bw2 = new BufferedWriter(fwriter2);
    FileWriter fwriter = new FileWriter("Crawl.txt", true);
    BufferedWriter bw = new BufferedWriter(fwriter);

    /*while(null != (crawlData2 = br.readLine()))
    {
    	if(crawlData2 !=null)
    		Crawl.add(crawlData2);
    }
    t = collectLinks(RTPage);
    Iterator<String> e3= t.iterator();
    while(e3.hasNext())
    {
    	String ee = e3.next();

    		if(!Crawl.contains(ee))
    		{
    			bw2.write(ee+"\r\n");
    		}



    }
    br.close();
    br2.close();
    bw.close();
    bw2.close();*/

    if (null == (crawlData = br.readLine()))
    // if(true)
    {
      // initial iteration
      bw.write(RTPage + "\r\n");
      Crawl.add(RTPage);
      t = collectLinks(RTPage);
      ToCrawl.addAll(t);
    } else {
      // collect data from files and load to array lists
      while (null != (crawlData2 = br.readLine())) {
        if (crawlData2 != null) Crawl.add(crawlData2);
      }

      while (null != (crawlData3 = br2.readLine())) {
        if (crawlData3 != null) ToCrawl.add(crawlData3);
      }
    }
    System.out.println("Crawlled");

    // Number of movies to be crawled
    for (int i = 0; i < 1000; i++) {
      if (ToCrawl.size() > 0) {
        Crawl.removeAll(Collections.singleton(null));
        ToCrawl.removeAll(Collections.singleton(null));
        String c = ToCrawl.get(0);
        if (Crawl.contains(c)) ToCrawl.remove(c);
        else {
          // collect links and collect data from a particular link
          Crawl.add(c);
          t = collectLinks(c);
          CollectData(c);
          ToCrawl.remove(c);
          Iterator<String> e3 = t.iterator();
          while (e3.hasNext()) {
            String ee = e3.next();
            if (!ToCrawl.contains(ee)) {
              if (!Crawl.contains(ee)) {
                ToCrawl.add(ee);
              }
            }
          }
          bw.write(c + "\r\n");
        }
      }
    }

    System.out.println("To Be Crawlled");
    Iterator<String> e2 = ToCrawl.iterator();
    while (e2.hasNext()) {
      // write to file the movies still to be crawled.
      bw2.write(e2.next() + "\r\n");
    }

    prop.setProperty("Id", Integer.toString(n));
    prop.store(new FileOutputStream("config.properties"), null);
    br.close();
    br2.close();
    bw.close();
    bw2.close();
  }
 /**
  * Unmarshall a Chromosome instance from a given XML Element representation.
  *
  * @param a_activeConfiguration current Configuration object
  * @param a_xmlElement the XML Element representation of the Chromosome
  * @return a new Chromosome instance setup with the data from the XML Element representation
  * @throws ImproperXMLException if the given Element is improperly structured or missing data
  * @throws UnsupportedRepresentationException if the actively configured Gene implementation does
  *     not support the string representation of the alleles used in the given XML document
  * @throws GeneCreationException if there is a problem creating or populating a Gene instance
  * @author Neil Rotstan
  * @since 1.0
  */
 public static Gene[] getGenesFromElement(
     Configuration a_activeConfiguration, Element a_xmlElement)
     throws ImproperXMLException, UnsupportedRepresentationException, GeneCreationException {
   // Do some sanity checking. Make sure the XML Element isn't null and
   // that it in fact represents a set of genes.
   // -----------------------------------------------------------------
   if (a_xmlElement == null || !(a_xmlElement.getTagName().equals(GENES_TAG))) {
     throw new ImproperXMLException(
         "Unable to build Chromosome instance from XML Element: "
             + "given Element is not a 'genes' element.");
   }
   List genes = Collections.synchronizedList(new ArrayList());
   // Extract the nested gene elements.
   // ---------------------------------
   NodeList geneElements = a_xmlElement.getElementsByTagName(GENE_TAG);
   if (geneElements == null) {
     throw new ImproperXMLException(
         "Unable to build Gene instances from XML Element: "
             + "'"
             + GENE_TAG
             + "'"
             + " sub-elements not found.");
   }
   // For each gene, get the class attribute so we know what class
   // to instantiate to represent the gene instance, and then find
   // the child text node, which is where the string representation
   // of the allele is located, and extract the representation.
   // -------------------------------------------------------------
   int numberOfGeneNodes = geneElements.getLength();
   for (int i = 0; i < numberOfGeneNodes; i++) {
     Element thisGeneElement = (Element) geneElements.item(i);
     thisGeneElement.normalize();
     // Fetch the class attribute and create an instance of that
     // class to represent the current gene.
     // --------------------------------------------------------
     String geneClassName = thisGeneElement.getAttribute(CLASS_ATTRIBUTE);
     Gene thisGeneObject;
     Class geneClass = null;
     try {
       geneClass = Class.forName(geneClassName);
       try {
         Constructor constr = geneClass.getConstructor(new Class[] {Configuration.class});
         thisGeneObject = (Gene) constr.newInstance(new Object[] {a_activeConfiguration});
       } catch (NoSuchMethodException nsme) {
         // Try it by calling method newGeneInternal.
         // -----------------------------------------
         Constructor constr = geneClass.getConstructor(new Class[] {});
         thisGeneObject = (Gene) constr.newInstance(new Object[] {});
         thisGeneObject =
             (Gene)
                 PrivateAccessor.invoke(
                     thisGeneObject, "newGeneInternal", new Class[] {}, new Object[] {});
       }
     } catch (Throwable e) {
       throw new GeneCreationException(geneClass, e);
     }
     // Find the text node and fetch the string representation of
     // the allele.
     // ---------------------------------------------------------
     NodeList children = thisGeneElement.getChildNodes();
     int childrenSize = children.getLength();
     String alleleRepresentation = null;
     for (int j = 0; j < childrenSize; j++) {
       Element alleleElem = (Element) children.item(j);
       if (alleleElem.getTagName().equals(ALLELE_TAG)) {
         alleleRepresentation = alleleElem.getAttribute("value");
       }
       if (children.item(j).getNodeType() == Node.TEXT_NODE) {
         // We found the text node. Extract the representation.
         // ---------------------------------------------------
         alleleRepresentation = children.item(j).getNodeValue();
         break;
       }
     }
     // Sanity check: Make sure the representation isn't null.
     // ------------------------------------------------------
     if (alleleRepresentation == null) {
       throw new ImproperXMLException(
           "Unable to build Gene instance from XML Element: "
               + "value (allele) is missing representation.");
     }
     // Now set the value of the gene to that reflect the
     // string representation.
     // -------------------------------------------------
     try {
       thisGeneObject.setValueFromPersistentRepresentation(alleleRepresentation);
     } catch (UnsupportedOperationException e) {
       throw new GeneCreationException(
           "Unable to build Gene because it does not support the "
               + "setValueFromPersistentRepresentation() method.");
     }
     // Finally, add the current gene object to the list of genes.
     // ----------------------------------------------------------
     genes.add(thisGeneObject);
   }
   return (Gene[]) genes.toArray(new Gene[genes.size()]);
 }