public static void indexTable( SecureTable t, DboTableMeta tableMeta, Collection<SolrInputDocument> solrDocs) throws IOException, SAXException, ParserConfigurationException, SolrServerException { // Solr server instance SolrServer solrServer = Search.getSolrServer(); // Add this new table to the meta index... createCoreIfNeeded("databusmeta", "databusmeta", solrServer); SolrInputDocument doc = new SolrInputDocument(); // Add special fields to track the type of record an the primary // key. doc.addField("id", t.getTableName()); doc.addField("type", "table"); if (t.getCreator() != null) doc.addField("creator_texts", t.getCreator().getUsername()); doc.addField("description_texts", t.getDescription()); if (t.getSchema() != null) { doc.addField("database_texts", t.getSchema().getSchemaName()); doc.addField("databaseDescription_texts", t.getSchema().getDescription()); } if (t.isSearchable()) doc.addField("isSearchable_texts", "true"); else doc.addField("isSearchable_texts", "false"); Set<String> allTermsSet = new HashSet<String>(); Set<String> columnsSet = new HashSet<String>(); for (DboColumnMeta m : tableMeta.getAllColumns()) { SdiColumn sdicol = t.getNameToField().get(m.getColumnName()); columnsSet.add(sdicol.getColumnName()); } doc.addField("column_texts", columnsSet); allTermsSet.add((String) doc.getField("id").getValue()); allTermsSet.add((String) doc.getField("type").getValue()); allTermsSet.add((String) doc.getField("description_texts").getValue()); allTermsSet.addAll(columnsSet); if (doc.getField("database_texts") != null) { allTermsSet.add((String) doc.getField("database_texts").getValue()); allTermsSet.add((String) doc.getField("databaseDescription_texts").getValue()); } doc.addField("allTerms_texts", allTermsSet); solrDocs.add(doc); // Create the core for data from this table to be indexed into when data is posted to this table if (t.isSearchable()) createCoreIfNeeded(t.getTableName(), t.getTableName(), solrServer); }
@Util public static void reindex() throws IOException, SAXException, ParserConfigurationException, SolrServerException { deleteExistingCores(); indexSchemas(); Cursor<KeyValue<SecureTable>> tablesCursor = SecureTable.findAllCursor(NoSql.em()); int i = 0; // very important to ignore any error and continue indexing. If an index has gotten // corrupted (which happens...) this will throw com.alvazan.orm.api.exc.RowNotFoundException // and will kill the entire reindex. long docsindexed = 0; long startTime = System.currentTimeMillis() - 1; Collection<SolrInputDocument> solrDocs = new ArrayList<SolrInputDocument>(); while (tablesCursor.next()) { if (++i % 200 == 0) NoSql.em().clear(); KeyValue<SecureTable> kv = tablesCursor.getCurrent(); try { if (kv.getValue() == null) continue; } catch (RowNotFoundException rnfe) { if (log.isInfoEnabled()) log.error( "got a corrupt index while reindexing, ignoring the error and continuing with indexing of other data."); // rnfe.printStackTrace(); continue; } SecureTable table = kv.getValue(); DboTableMeta meta = table.getTableMeta(); SearchUtils.indexTable(table, meta, solrDocs); if (table.isSearchable()) { log.info("found a searchable table " + table.getName() + " indexing it."); String sql = "select c from " + table.getTableName() + " as c"; Collection<SolrInputDocument> tablesolrDocs = new ArrayList<SolrInputDocument>(); try { QueryResult result = NoSql.em().getTypedSession().createQueryCursor(sql, SqlPullProcessor.BATCH_SIZE); Iterator<List<TypedRow>> cursor = result.getAllViewsIter().iterator(); while (true) { // I hate this, but cursor.hasNext() can throw an exception which means we need to skip // over // that item but continue on with the cursor till it runs out: List<TypedRow> typedRows = getNext(cursor); if (typedRows == null) break; for (TypedRow prow : typedRows) { SearchPosting.addSolrDataDoc(prow, table, tablesolrDocs); } if (tablesolrDocs.size() > REINDEX_BATCH_SIZE) { docsindexed += solrDocs.size(); System.out.println( "hit solr doc batch size in a searchable table, " + docsindexed + " docs so far, " + (System.currentTimeMillis() - startTime) + " millis elapsed " + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000)) + " docs per sec."); SearchPosting.saveSolr("reindex", tablesolrDocs, null); tablesolrDocs = new ArrayList<SolrInputDocument>(); } } SearchPosting.saveSolr("reindex", tablesolrDocs, null); docsindexed += solrDocs.size(); } catch (Exception e) { System.out.println( "got an exception while indexing a searchable table with the query (probably a corrupt index in playorm):"); System.out.println(sql); // e.printStackTrace(); } } if (solrDocs.size() > REINDEX_BATCH_SIZE) { docsindexed += solrDocs.size(); System.out.println( "hit solr doc batch size in metadata, " + docsindexed + " docs so far, " + (System.currentTimeMillis() - startTime) + " millis elapsed " + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000)) + " docs per sec."); SearchPosting.saveSolr("reindex", solrDocs, "databusmeta"); solrDocs = new ArrayList<SolrInputDocument>(); } } if (solrDocs.size() > 0) { docsindexed += solrDocs.size(); System.out.println( "hit solr doc batch size during finalization, " + docsindexed + " docs so far, " + (System.currentTimeMillis() - startTime) + " millis elapsed " + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000)) + " docs per sec."); SearchPosting.saveSolr("reindex", solrDocs, "databusmeta"); solrDocs = new ArrayList<SolrInputDocument>(); } }