public void importFromFile(String filePath) throws IOException { Map<String, Long> cache = new HashMap<String, Long>(COUNT); final File storeDir = new File(this.path); org.apache.commons.io.FileUtils.deleteDirectory(storeDir); BatchInserter batchInserter = new BatchInserterImpl(storeDir.getAbsolutePath()); final BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(batchInserter); final BatchInserterIndex index = indexProvider.nodeIndex("nodes", MapUtil.stringMap("type", "exact")); BufferedReader reader = new BufferedReader(new FileReader(filePath)); String line = null; int nodes = 0; long time = System.currentTimeMillis(); long batchTime = time; while ((line = reader.readLine()) != null) { final String[] nodeNames = line.split("\\|"); final String name = nodeNames[0]; final Map<String, Object> props = MapUtil.map("name", name); final long node = batchInserter.createNode(props); index.add(node, props); cache.put(name, node); nodes++; if ((nodes % REPORT_COUNT) == 0) { System.out.printf( "%d nodes created. Took %d %n", nodes, (System.currentTimeMillis() - batchTime)); batchTime = System.currentTimeMillis(); } } System.out.println("Creating nodes took " + (System.currentTimeMillis() - time) / 1000); index.flush(); reader.close(); reader = new BufferedReader(new FileReader(filePath)); int rels = 0; time = System.currentTimeMillis(); batchTime = time; String relationshipType = "KNOWS"; while ((line = reader.readLine()) != null) { final String[] nodeNames = line.split("\\|"); final String name = nodeNames[0]; // final Long from = index.get("name", name).getSingle(); Long from = cache.get(name); for (int j = 1; j < nodeNames.length; j++) { // final Long to = index.get("name", nodeNames[j]).getSingle(); final Long to = cache.get(name); batchInserter.createRelationship( from, to, DynamicRelationshipType.withName(relationshipType), null); } rels++; if ((rels % REPORT_COUNT) == 0) { System.out.printf( "%d relationships created. Took %d %n", rels, (System.currentTimeMillis() - batchTime)); batchTime = System.currentTimeMillis(); } } System.out.println("Creating relationships took " + (System.currentTimeMillis() - time) / 1000); indexProvider.shutdown(); batchInserter.shutdown(); }
public <T extends Element> Index<T> createManualIndex( final String indexName, final Class<T> indexClass, final Parameter... indexParameters) { final Neo4jBatchIndex<T> index; final Map<String, String> map = generateParameterMap(indexParameters); if (indexParameters.length == 0) { map.put(Neo4jBatchTokens.TYPE, Neo4jBatchTokens.EXACT); } map.put(Neo4jBatchTokens.BLUEPRINTS_TYPE, Index.Type.MANUAL.toString()); if (Vertex.class.isAssignableFrom(indexClass)) { index = new Neo4jBatchIndex<T>( this, indexProvider.nodeIndex(indexName, map), indexName, indexClass); } else { index = new Neo4jBatchIndex<T>( this, indexProvider.relationshipIndex(indexName, map), indexName, indexClass); } this.indices.put(indexName, index); return index; }
public <T extends Element> AutomaticIndex<T> createAutomaticIndex( final String indexName, final Class<T> indexClass, final Set<String> indexKeys, final Parameter... indexParameters) { final Neo4jBatchAutomaticIndex<T> index; final Map<String, String> map = generateParameterMap(indexParameters); if (indexParameters.length == 0) { map.put(Neo4jBatchTokens.TYPE, Neo4jBatchTokens.EXACT); } map.put(Neo4jBatchTokens.BLUEPRINTS_TYPE, Index.Type.AUTOMATIC.toString()); map.put(Neo4jBatchTokens.BLUEPRINTS_AUTOKEYS, makeAutoIndexKeys(indexKeys)); if (indexClass.equals(Vertex.class)) { index = new Neo4jBatchAutomaticIndex<T>( this, indexProvider.nodeIndex(indexName, map), indexName, indexClass, indexKeys); } else { index = new Neo4jBatchAutomaticIndex<T>( this, indexProvider.relationshipIndex(indexName, map), indexName, indexClass, indexKeys); } this.indices.put(indexName, index); if (Vertex.class.isAssignableFrom(indexClass)) { this.automaticVertexIndices.put( indexName, (Neo4jBatchAutomaticIndex<Neo4jBatchVertex>) index); } else { this.automaticEdgeIndices.put(indexName, (Neo4jBatchAutomaticIndex<Neo4jBatchEdge>) index); } return index; }
public static void main(String[] args) { File currentFolder = new File("."); File[] files = currentFolder.listFiles(); BatchInserter inserter = null; BatchInserterIndexProvider indexProvider = null; // ---------------------------------------------------------------------------------- // ---------------------initializing node type properties---------------------------- genomeElementProperties.put(GenomeElementNode.NODE_TYPE_PROPERTY, GenomeElementNode.NODE_TYPE); geneProperties.put(GeneNode.NODE_TYPE_PROPERTY, GeneNode.NODE_TYPE); cdsProperties.put(CDSNode.NODE_TYPE_PROPERTY, CDSNode.NODE_TYPE); miscRnaProperties.put(MiscRNANode.NODE_TYPE_PROPERTY, MiscRNANode.NODE_TYPE); mRnaProperties.put(MRNANode.NODE_TYPE_PROPERTY, MRNANode.NODE_TYPE); ncRnaProperties.put(NcRNANode.NODE_TYPE_PROPERTY, NcRNANode.NODE_TYPE); rRnaProperties.put(RRNANode.NODE_TYPE_PROPERTY, RRNANode.NODE_TYPE); tmRnaProperties.put(TmRNANode.NODE_TYPE_PROPERTY, TmRNANode.NODE_TYPE); tRnaProperties.put(TRNANode.NODE_TYPE_PROPERTY, TRNANode.NODE_TYPE); // ---------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------- try { // This block configures the logger with handler and formatter fh = new FileHandler("ImportGenbank.log", false); SimpleFormatter formatter = new SimpleFormatter(); fh.setFormatter(formatter); logger.addHandler(fh); logger.setLevel(Level.ALL); // create the batch inserter inserter = new BatchInserterImpl( CommonData.DATABASE_FOLDER, BatchInserterImpl.loadProperties(CommonData.PROPERTIES_FILE_NAME)); // create the batch index service indexProvider = new LuceneBatchInserterIndexProvider(inserter); // -----------------create batch indexes---------------------------------- // ---------------------------------------------------------------------- BatchInserterIndex genomeElementVersionIndex = indexProvider.nodeIndex( GenomeElementNode.GENOME_ELEMENT_VERSION_INDEX, MapUtil.stringMap(PROVIDER_ST, LUCENE_ST, TYPE_ST, EXACT_ST)); for (File file : files) { if (file.getName().endsWith(".gbff")) { BufferedReader reader = new BufferedReader(new FileReader(file)); String line = null; while ((line = reader.readLine()) != null) { // this is the first line where the locus is String accessionSt = ""; String definitionSt = ""; String versionSt = ""; String commentSt = ""; StringBuilder seqStBuilder = new StringBuilder(); ArrayList<String> cdsList = new ArrayList<String>(); ArrayList<String> geneList = new ArrayList<String>(); ArrayList<String> miscRnaList = new ArrayList<String>(); ArrayList<String> mRnaList = new ArrayList<String>(); ArrayList<String> ncRnaList = new ArrayList<String>(); ArrayList<String> rRnaList = new ArrayList<String>(); ArrayList<String> tmRnaList = new ArrayList<String>(); ArrayList<String> tRnaList = new ArrayList<String>(); boolean originFound = false; // Now I get all the lines till I reach the string '//' do { boolean readLineFlag = true; if (line.startsWith(GBCommon.LOCUS_STR)) { // do nothing right now } else if (line.startsWith(GBCommon.ACCESSION_STR)) { accessionSt = line.split(GBCommon.ACCESSION_STR)[1].trim(); } else if (line.startsWith(GBCommon.VERSION_STR)) { versionSt = line.split(GBCommon.VERSION_STR)[1].trim().split(" ")[0]; } else if (line.startsWith(GBCommon.DEFINITION_STR)) { definitionSt += line.split(GBCommon.DEFINITION_STR)[1].trim(); do { line = reader.readLine(); if (line.startsWith(" ")) { definitionSt += line.trim(); } } while (line.startsWith(" ")); readLineFlag = false; } else if (line.startsWith(GBCommon.COMMENT_STR)) { commentSt += line.split(GBCommon.COMMENT_STR)[1].trim(); do { line = reader.readLine(); if (line.startsWith(" ")) { commentSt += "\n" + line.trim(); } } while (line.startsWith(" ")); readLineFlag = false; } else if (line.startsWith(GBCommon.FEATURES_STR)) { do { line = reader.readLine(); if (line.trim().startsWith(GBCommon.CDS_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.CDS_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } cdsList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.GENE_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.GENE_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } geneList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.MISC_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.MISC_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } miscRnaList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.TM_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.TM_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } tmRnaList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.R_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.R_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } rRnaList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.M_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.M_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } mRnaList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.NC_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.NC_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } ncRnaList.add(positionsSt); } else if (line.trim().startsWith(GBCommon.T_RNA_STR)) { String positionsSt = ""; positionsSt += line.trim().split(GBCommon.T_RNA_STR)[1].trim(); line = reader.readLine(); while (!line.trim().startsWith("/")) { positionsSt += line.trim(); line = reader.readLine(); } tRnaList.add(positionsSt); } } while (line.startsWith(" ")); readLineFlag = false; } else if (line.startsWith(GBCommon.ORIGIN_STR)) { // sequence originFound = true; do { line = reader.readLine(); String[] tempArray = line.trim().split(" "); for (int i = 1; i < tempArray.length; i++) { seqStBuilder.append(tempArray[i]); } } while (line.startsWith(" ")); readLineFlag = false; } if (readLineFlag) { line = reader.readLine(); } } while (line != null && !line.startsWith(GBCommon.LAST_LINE_STR)); // -----we only save the data when the sequence is found------------ if (originFound) { System.out.println("accessionSt = " + accessionSt); System.out.println("versionSt = " + versionSt); System.out.println("definitionSt = " + definitionSt); System.out.println("commentSt = " + commentSt); System.out.println("sequence.length = " + seqStBuilder.toString().length()); System.out.println("geneList = " + geneList); System.out.println("cdsList = " + cdsList); System.out.println("miscRnaList = " + miscRnaList); System.out.println("mRnaList = " + mRnaList); System.out.println("ncRnaList = " + ncRnaList); System.out.println("rRnaList = " + rRnaList); System.out.println("tmRnaList = " + tmRnaList); System.out.println("tRnaList = " + tRnaList); // --------create genome element node-------------- long genomeElementId = createGenomeElementNode( versionSt, commentSt, definitionSt, inserter, genomeElementVersionIndex); // -----------genes----------------- for (String genePositionsSt : geneList) { geneProperties.put(GeneNode.POSITIONS_PROPERTY, genePositionsSt); long geneId = inserter.createNode(geneProperties); inserter.createRelationship(genomeElementId, geneId, genomeElementGeneRel, null); } // -----------CDS----------------- for (String cdsPositionsSt : cdsList) { cdsProperties.put(CDSNode.POSITIONS_PROPERTY, cdsPositionsSt); long cdsID = inserter.createNode(cdsProperties); inserter.createRelationship(genomeElementId, cdsID, genomeElementCDSRel, null); } // -----------misc rna----------------- for (String miscRnaPositionsSt : miscRnaList) { miscRnaProperties.put(MiscRNANode.POSITIONS_PROPERTY, miscRnaPositionsSt); long miscRnaID = inserter.createNode(miscRnaProperties); inserter.createRelationship( genomeElementId, miscRnaID, genomeElementMiscRnaRel, null); } // -----------m rna----------------- for (String mRnaPositionsSt : mRnaList) { mRnaProperties.put(MRNANode.POSITIONS_PROPERTY, mRnaPositionsSt); long mRnaID = inserter.createNode(mRnaProperties); inserter.createRelationship(genomeElementId, mRnaID, genomeElementMRnaRel, null); } // -----------nc rna----------------- for (String ncRnaPositionsSt : ncRnaList) { ncRnaProperties.put(NcRNANode.POSITIONS_PROPERTY, ncRnaPositionsSt); long ncRnaID = inserter.createNode(ncRnaProperties); inserter.createRelationship(genomeElementId, ncRnaID, genomeElementNcRnaRel, null); } // -----------r rna----------------- for (String rRnaPositionsSt : rRnaList) { rRnaProperties.put(RRNANode.POSITIONS_PROPERTY, rRnaPositionsSt); long rRnaID = inserter.createNode(rRnaProperties); inserter.createRelationship(genomeElementId, rRnaID, genomeElementRRnaRel, null); } // -----------tm rna----------------- for (String tmRnaPositionsSt : tmRnaList) { tmRnaProperties.put(TmRNANode.POSITIONS_PROPERTY, tmRnaPositionsSt); long tmRnaID = inserter.createNode(tmRnaProperties); inserter.createRelationship(genomeElementId, tmRnaID, genomeElementTmRnaRel, null); } // -----------t rna----------------- for (String tRnaPositionsSt : tRnaList) { tRnaProperties.put(TRNANode.POSITIONS_PROPERTY, tRnaPositionsSt); long tRnaID = inserter.createNode(tRnaProperties); inserter.createRelationship(genomeElementId, tRnaID, genomeElementTRnaRel, null); } } } } } } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); StackTraceElement[] trace = e.getStackTrace(); for (StackTraceElement stackTraceElement : trace) { logger.log(Level.SEVERE, stackTraceElement.toString()); } } finally { // shutdown, makes sure all changes are written to disk indexProvider.shutdown(); inserter.shutdown(); // closing logger file handler fh.close(); } }