/** * Given a graph database gDB, first prune distinct edges with the edgeIndex Then randomly * selected chooseN graphs * * @param gDB * @param chooseN * @param pruneDistinctEdges * @param edgeIndex * @return * @throws IOException * @throws ParseException */ public static Graph[] randomlyChooseDBGraph( GraphDatabase_OnDisk gDB, int chooseN, boolean pruneDistinctEdges, EdgeIndex edgeIndex) throws IOException, ParseException { if (pruneDistinctEdges == false) return randomlyChooseDBGraph(gDB, chooseN); // First Step: filter out graphs that are not in between the boundary String tempFileName = gDB.getDBFileName() + "_temp"; BufferedWriter tempDBWriter = new BufferedWriter(new FileWriter(tempFileName)); int count = 0; float edgeNum = 0, nodeNum = 0; for (int i = 0; i < gDB.getTotalNum(); i++) { Graph theGraph = gDB.findGraph(i); if (edgeIndex.containInfrequentEdges(theGraph)) continue; else { tempDBWriter.write(count++ + " => " + gDB.findGraphString(i) + "\n"); edgeNum += theGraph.getEdgeCount(); nodeNum += theGraph.getNodeCount(); } } tempDBWriter.close(); // Intrigue java garbage collector Runtime r = Runtime.getRuntime(); r.gc(); // Write the meta information of the new file BufferedWriter metaWriter = new BufferedWriter(new FileWriter(tempFileName + "_Meta")); // 1. Processing Date SimpleDateFormat bartDateFormat = new SimpleDateFormat("EEEE-MMMM-dd-yyyy"); Date date = new Date(); metaWriter.write(bartDateFormat.format(date)); metaWriter.newLine(); // 2. Number of graphs in this file metaWriter.write("Number of Graphs:" + count); metaWriter.newLine(); metaWriter.write( "Average EdgeNum: " + (float) (edgeNum) / count + ", Average NodeNum: " + (float) (nodeNum) / count); // Close meta data file try { metaWriter.close(); } catch (IOException e) { e.printStackTrace(); } return randomlyChooseDBGraph(new GraphDatabase_OnDisk(tempFileName, gDB.getParser()), chooseN); }
@Override public Document vectorize(int gID, Graph g, Map<Integer, String> gHash) throws ParseException { Document gDoc = new Document(); if (g.getEdgeCount() == 0) return gDoc; String graphString = gParser.serialize(g); Field stringField = new Field("gString", graphString, Field.Store.YES, Field.Index.NO); gDoc.add(stringField); Field IDField = new Field("gID", new Integer(gID).toString(), Field.Store.YES, Field.Index.NO); gDoc.add(IDField); List<Integer> allIDs = searcher.subgraphs(g, new SearchStatus()); // 0. Add One "-1" to the subGraphs fields [for pure mustNot search] gDoc.add( new Field( "subGraphs", (new Integer(-1)).toString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); if (allIDs == null || allIDs.size() == 0) return gDoc; // 1. Collections.sort(allIDs); for (int i = 0; i < allIDs.size(); i++) { if (gHash == null) { String byteString = allIDs.get(i).toString(); gDoc.add(new Field("subGraphs", byteString, Field.Store.NO, Field.Index.NOT_ANALYZED)); } else { gDoc.add( new Field( "subGraphs", gHash.get(allIDs.get(i)), Field.Store.NO, Field.Index.NOT_ANALYZED)); } } // StringBuffer sBuf = new StringBuffer(); // for(int i = 0; i< allIDs.length; i++ ){ // if(gHash==null) // sBuf.append(new Integer(allIDs[i]).toString()); // else { // sBuf.append(gHash.get(allIDs[i])); // // //TEST // // String indexString = gHash.get(allIDs[i]); // // String indexString2 = searcher.getLabel(allIDs[i]); // // if(!indexString.equals(indexString2)){ // // System.out.println("lala"); // // } // // //END OF TEST // } // sBuf.append(" "); // } // String termString = sBuf.substring(0, sBuf.length()-1); // // Field subgraphField = new Field("subGraphs", termString, // Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); // gDoc.add(subgraphField); return gDoc; }
public static void saveGDB(Graph[] graphs, GraphParser gParser, String fileName) throws IOException { // First Step: filter out graphs that are not in between the boundary BufferedWriter dbWriter = new BufferedWriter(new FileWriter(fileName)); int count = 0; float edgeNum = 0, nodeNum = 0; for (int i = 0; i < graphs.length; i++) { Graph theGraph = graphs[i]; dbWriter.write(count++ + " => " + gParser.serialize(theGraph) + "\n"); edgeNum += theGraph.getEdgeCount(); nodeNum += theGraph.getNodeCount(); } dbWriter.close(); // Intrigue java garbage collector Runtime r = Runtime.getRuntime(); r.gc(); // Write the meta information of the new file BufferedWriter metaWriter = new BufferedWriter(new FileWriter(fileName + "_Meta")); // 1. Processing Date SimpleDateFormat bartDateFormat = new SimpleDateFormat("EEEE-MMMM-dd-yyyy"); Date date = new Date(); metaWriter.write(bartDateFormat.format(date)); metaWriter.newLine(); // 2. Number of graphs in this file metaWriter.write("Number of Graphs:" + count); metaWriter.newLine(); metaWriter.write( "Average EdgeNum: " + (float) (edgeNum) / count + ", Average NodeNum: " + (float) (nodeNum) / count); // Close meta data file try { metaWriter.close(); } catch (IOException e) { e.printStackTrace(); } }
public static void changeFormatSimple( String SDFFileName, String SmilesFileName, GraphFactory graphFactory) throws IOException { // Open InputFile BufferedReader fileBufReader = new BufferedReader(new FileReader(SDFFileName)); // Open OutputFile BufferedWriter outputWriter = new BufferedWriter(new FileWriter(SmilesFileName)); SDFParserModified SDFParser = MyFactory.getSDFParserM(); SmilesParser smilesParser = MyFactory.getSmilesParser(); Graph oneGraph = null; double aveEdgeCount = 0; double aveNodeCount = 0; int index = 0; String spliter = " => "; while (fileBufReader.ready()) { try { oneGraph = SDFParser.parse(fileBufReader, graphFactory); } catch (ParseException e) { System.out.println("skip one graph"); while (fileBufReader.ready()) { String aLine = fileBufReader.readLine(); if (aLine.equals("$$$$")) break; } continue; } if (GraphConnectivityTester.isConnected(oneGraph)) { aveEdgeCount = (aveEdgeCount) * (index / ((double) index + 1)) + oneGraph.getEdgeCount() / ((double) index + 1); aveNodeCount = (aveNodeCount) * (index / ((double) index + 1)) + oneGraph.getNodeCount() / ((double) index + 1); if (index > 0) outputWriter.newLine(); outputWriter.write(index + spliter + smilesParser.serialize(oneGraph)); index++; } } // Close input File try { fileBufReader.close(); outputWriter.flush(); outputWriter.close(); } catch (IOException e) { e.printStackTrace(); } System.out.println( "In processor: changeFormat, " + index + " number of graphs" + "has been formated into Smiles Format"); // Intrigue java garbage collector Runtime r = Runtime.getRuntime(); r.gc(); // Write the meta information of the smile data file: BufferedWriter metaWriter = new BufferedWriter(new FileWriter(SmilesFileName + "_Meta")); // 1. Processing Date SimpleDateFormat bartDateFormat = new SimpleDateFormat("EEEE-MMMM-dd-yyyy"); Date date = new Date(); metaWriter.write(bartDateFormat.format(date)); metaWriter.newLine(); // 2. Number of graphs in this file metaWriter.write("Number of Graphs:" + index); metaWriter.write("Ave EdgeCount: " + aveEdgeCount + " Ave NodeCount: " + aveNodeCount); // Close meta data file try { metaWriter.flush(); metaWriter.close(); } catch (IOException e) { e.printStackTrace(); } }
/** * @param smilesDBFile * @param chooseN * @param chosenDBFile * @throws ParseException * @throws IOException */ public static void randomlyChooseTwoDBGraph(String smilesDBFile, int chooseN, String chosenDBFile) throws ParseException, IOException { // First get the number of graphs in this database, which can be found in the metadata file BufferedReader metaFile = new BufferedReader(new FileReader(smilesDBFile + "_Meta")); metaFile.readLine(); String[] temp = metaFile.readLine().split(":"); int m = Integer.parseInt(temp[1]); metaFile.close(); int edgeNum = 0, nodeNum = 0; // Then randomly select n from m: the first chooseN indexes are sorted as stored in indexes int[] indexes = new int[m]; for (int i = 0; i < m; i++) indexes[i] = i; Random rd = new Random(); int j = 0; int swapTemp = 0; ; for (int i = 0; i < 2 * chooseN; i++) { j = (int) (rd.nextFloat() * (m - i)) + i; swapTemp = indexes[i]; indexes[i] = indexes[j]; indexes[j] = swapTemp; } Arrays.sort(indexes, 0, 2 * chooseN); // Read those db graphs and save them into the new file BufferedReader fullDBReader = new BufferedReader(new FileReader(smilesDBFile)); BufferedWriter chosenDBWriter1 = new BufferedWriter(new FileWriter(chosenDBFile + "_1")); BufferedWriter chosenDBWriter2 = new BufferedWriter(new FileWriter(chosenDBFile + "_2")); int fileLineIndex = 0; int i = 0; String aLine = null; String spliter = " => "; while ((aLine = fullDBReader.readLine()) != null && i < 2 * chooseN) { if (fileLineIndex < indexes[i]) { fileLineIndex++; continue; // keep on reading } else if (fileLineIndex == indexes[i] && i < chooseN) { // index=> orignalIndex =>smiles String gString = aLine.split(spliter)[1]; Graph g = MyFactory.getSmilesParser().parse(gString, MyFactory.getGraphFactory()); edgeNum += g.getEdgeCount(); nodeNum += g.getNodeCount(); chosenDBWriter1.write(i + spliter + gString); chosenDBWriter1.newLine(); i++; fileLineIndex++; } else if (fileLineIndex == indexes[i] && i >= chooseN) { // index=> orignalIndex =>smiles String gString = aLine.split(spliter)[1]; Graph g = MyFactory.getSmilesParser().parse(gString, MyFactory.getGraphFactory()); edgeNum += g.getEdgeCount(); nodeNum += g.getNodeCount(); chosenDBWriter2.write(i + spliter + gString); chosenDBWriter2.newLine(); i++; fileLineIndex++; } else if (fileLineIndex > indexes[i]) System.out.println("Exception: Processor: randomlyChooseDBGraph"); } // Close out File try { chosenDBWriter1.close(); chosenDBWriter2.close(); fullDBReader.close(); } catch (IOException e) { e.printStackTrace(); } // Intrigue java garbage collector Runtime r = Runtime.getRuntime(); r.gc(); // Write the meta information of the new file BufferedWriter metaWriter = new BufferedWriter(new FileWriter(chosenDBFile + "_1" + "_Meta")); // 1. Processing Date SimpleDateFormat bartDateFormat = new SimpleDateFormat("EEEE-MMMM-dd-yyyy"); Date date = new Date(); metaWriter.write(bartDateFormat.format(date)); metaWriter.newLine(); // 2. Number of graphs in this file metaWriter.write("Number of Graphs:" + i); metaWriter.newLine(); metaWriter.write( "Average EdgeNum: " + (float) (edgeNum) / i + ", Average NodeNum: " + (float) (nodeNum) / i); // Close meta data file try { metaWriter.close(); } catch (IOException e) { e.printStackTrace(); } }