public void CrawlRT(String RTPage) throws IOException { ArrayList<String> t = new ArrayList<String>(); String crawlData; String crawlData2; String crawlData3; FileReader freader = new FileReader("Crawl.txt"); BufferedReader br = new BufferedReader(freader); FileReader freader2 = new FileReader("Tocrawl.txt"); BufferedReader br2 = new BufferedReader(freader2); FileWriter fwriter2 = new FileWriter("Tocrawl.txt", true); BufferedWriter bw2 = new BufferedWriter(fwriter2); FileWriter fwriter = new FileWriter("Crawl.txt", true); BufferedWriter bw = new BufferedWriter(fwriter); /*while(null != (crawlData2 = br.readLine())) { if(crawlData2 !=null) Crawl.add(crawlData2); } t = collectLinks(RTPage); Iterator<String> e3= t.iterator(); while(e3.hasNext()) { String ee = e3.next(); if(!Crawl.contains(ee)) { bw2.write(ee+"\r\n"); } } br.close(); br2.close(); bw.close(); bw2.close();*/ if (null == (crawlData = br.readLine())) // if(true) { // initial iteration bw.write(RTPage + "\r\n"); Crawl.add(RTPage); t = collectLinks(RTPage); ToCrawl.addAll(t); } else { // collect data from files and load to array lists while (null != (crawlData2 = br.readLine())) { if (crawlData2 != null) Crawl.add(crawlData2); } while (null != (crawlData3 = br2.readLine())) { if (crawlData3 != null) ToCrawl.add(crawlData3); } } System.out.println("Crawlled"); // Number of movies to be crawled for (int i = 0; i < 1000; i++) { if (ToCrawl.size() > 0) { Crawl.removeAll(Collections.singleton(null)); ToCrawl.removeAll(Collections.singleton(null)); String c = ToCrawl.get(0); if (Crawl.contains(c)) ToCrawl.remove(c); else { // collect links and collect data from a particular link Crawl.add(c); t = collectLinks(c); CollectData(c); ToCrawl.remove(c); Iterator<String> e3 = t.iterator(); while (e3.hasNext()) { String ee = e3.next(); if (!ToCrawl.contains(ee)) { if (!Crawl.contains(ee)) { ToCrawl.add(ee); } } } bw.write(c + "\r\n"); } } } System.out.println("To Be Crawlled"); Iterator<String> e2 = ToCrawl.iterator(); while (e2.hasNext()) { // write to file the movies still to be crawled. bw2.write(e2.next() + "\r\n"); } prop.setProperty("Id", Integer.toString(n)); prop.store(new FileOutputStream("config.properties"), null); br.close(); br2.close(); bw.close(); bw2.close(); }
/** * Unmarshall a Chromosome instance from a given XML Element representation. * * @param a_activeConfiguration current Configuration object * @param a_xmlElement the XML Element representation of the Chromosome * @return a new Chromosome instance setup with the data from the XML Element representation * @throws ImproperXMLException if the given Element is improperly structured or missing data * @throws UnsupportedRepresentationException if the actively configured Gene implementation does * not support the string representation of the alleles used in the given XML document * @throws GeneCreationException if there is a problem creating or populating a Gene instance * @author Neil Rotstan * @since 1.0 */ public static Gene[] getGenesFromElement( Configuration a_activeConfiguration, Element a_xmlElement) throws ImproperXMLException, UnsupportedRepresentationException, GeneCreationException { // Do some sanity checking. Make sure the XML Element isn't null and // that it in fact represents a set of genes. // ----------------------------------------------------------------- if (a_xmlElement == null || !(a_xmlElement.getTagName().equals(GENES_TAG))) { throw new ImproperXMLException( "Unable to build Chromosome instance from XML Element: " + "given Element is not a 'genes' element."); } List genes = Collections.synchronizedList(new ArrayList()); // Extract the nested gene elements. // --------------------------------- NodeList geneElements = a_xmlElement.getElementsByTagName(GENE_TAG); if (geneElements == null) { throw new ImproperXMLException( "Unable to build Gene instances from XML Element: " + "'" + GENE_TAG + "'" + " sub-elements not found."); } // For each gene, get the class attribute so we know what class // to instantiate to represent the gene instance, and then find // the child text node, which is where the string representation // of the allele is located, and extract the representation. // ------------------------------------------------------------- int numberOfGeneNodes = geneElements.getLength(); for (int i = 0; i < numberOfGeneNodes; i++) { Element thisGeneElement = (Element) geneElements.item(i); thisGeneElement.normalize(); // Fetch the class attribute and create an instance of that // class to represent the current gene. // -------------------------------------------------------- String geneClassName = thisGeneElement.getAttribute(CLASS_ATTRIBUTE); Gene thisGeneObject; Class geneClass = null; try { geneClass = Class.forName(geneClassName); try { Constructor constr = geneClass.getConstructor(new Class[] {Configuration.class}); thisGeneObject = (Gene) constr.newInstance(new Object[] {a_activeConfiguration}); } catch (NoSuchMethodException nsme) { // Try it by calling method newGeneInternal. // ----------------------------------------- Constructor constr = geneClass.getConstructor(new Class[] {}); thisGeneObject = (Gene) constr.newInstance(new Object[] {}); thisGeneObject = (Gene) PrivateAccessor.invoke( thisGeneObject, "newGeneInternal", new Class[] {}, new Object[] {}); } } catch (Throwable e) { throw new GeneCreationException(geneClass, e); } // Find the text node and fetch the string representation of // the allele. // --------------------------------------------------------- NodeList children = thisGeneElement.getChildNodes(); int childrenSize = children.getLength(); String alleleRepresentation = null; for (int j = 0; j < childrenSize; j++) { Element alleleElem = (Element) children.item(j); if (alleleElem.getTagName().equals(ALLELE_TAG)) { alleleRepresentation = alleleElem.getAttribute("value"); } if (children.item(j).getNodeType() == Node.TEXT_NODE) { // We found the text node. Extract the representation. // --------------------------------------------------- alleleRepresentation = children.item(j).getNodeValue(); break; } } // Sanity check: Make sure the representation isn't null. // ------------------------------------------------------ if (alleleRepresentation == null) { throw new ImproperXMLException( "Unable to build Gene instance from XML Element: " + "value (allele) is missing representation."); } // Now set the value of the gene to that reflect the // string representation. // ------------------------------------------------- try { thisGeneObject.setValueFromPersistentRepresentation(alleleRepresentation); } catch (UnsupportedOperationException e) { throw new GeneCreationException( "Unable to build Gene because it does not support the " + "setValueFromPersistentRepresentation() method."); } // Finally, add the current gene object to the list of genes. // ---------------------------------------------------------- genes.add(thisGeneObject); } return (Gene[]) genes.toArray(new Gene[genes.size()]); }