コード例 #1
0
ファイル: SearchUtils.java プロジェクト: hopcroft/databus
  @Util
  public static void reindex()
      throws IOException, SAXException, ParserConfigurationException, SolrServerException {

    deleteExistingCores();
    indexSchemas();

    Cursor<KeyValue<SecureTable>> tablesCursor = SecureTable.findAllCursor(NoSql.em());
    int i = 0;
    // very important to ignore any error and continue indexing.  If an index has gotten
    // corrupted (which happens...) this will throw com.alvazan.orm.api.exc.RowNotFoundException
    // and will kill the entire reindex.
    long docsindexed = 0;
    long startTime = System.currentTimeMillis() - 1;
    Collection<SolrInputDocument> solrDocs = new ArrayList<SolrInputDocument>();
    while (tablesCursor.next()) {
      if (++i % 200 == 0) NoSql.em().clear();
      KeyValue<SecureTable> kv = tablesCursor.getCurrent();

      try {
        if (kv.getValue() == null) continue;
      } catch (RowNotFoundException rnfe) {
        if (log.isInfoEnabled())
          log.error(
              "got a corrupt index while reindexing, ignoring the error and continuing with indexing of other data.");
        // rnfe.printStackTrace();
        continue;
      }
      SecureTable table = kv.getValue();
      DboTableMeta meta = table.getTableMeta();

      SearchUtils.indexTable(table, meta, solrDocs);

      if (table.isSearchable()) {
        log.info("found a searchable table " + table.getName() + " indexing it.");
        String sql = "select c from " + table.getTableName() + " as c";
        Collection<SolrInputDocument> tablesolrDocs = new ArrayList<SolrInputDocument>();

        try {
          QueryResult result =
              NoSql.em().getTypedSession().createQueryCursor(sql, SqlPullProcessor.BATCH_SIZE);
          Iterator<List<TypedRow>> cursor = result.getAllViewsIter().iterator();

          while (true) {
            // I hate this, but cursor.hasNext() can throw an exception which means we need to skip
            // over
            // that item but continue on with the cursor till it runs out:
            List<TypedRow> typedRows = getNext(cursor);
            if (typedRows == null) break;
            for (TypedRow prow : typedRows) {
              SearchPosting.addSolrDataDoc(prow, table, tablesolrDocs);
            }
            if (tablesolrDocs.size() > REINDEX_BATCH_SIZE) {
              docsindexed += solrDocs.size();
              System.out.println(
                  "hit solr doc batch size in a searchable table, "
                      + docsindexed
                      + " docs so far, "
                      + (System.currentTimeMillis() - startTime)
                      + " millis elapsed "
                      + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000))
                      + " docs per sec.");
              SearchPosting.saveSolr("reindex", tablesolrDocs, null);
              tablesolrDocs = new ArrayList<SolrInputDocument>();
            }
          }
          SearchPosting.saveSolr("reindex", tablesolrDocs, null);
          docsindexed += solrDocs.size();
        } catch (Exception e) {
          System.out.println(
              "got an exception while indexing a searchable table with the query (probably a corrupt index in playorm):");
          System.out.println(sql);
          // e.printStackTrace();
        }
      }
      if (solrDocs.size() > REINDEX_BATCH_SIZE) {
        docsindexed += solrDocs.size();
        System.out.println(
            "hit solr doc batch size in metadata, "
                + docsindexed
                + " docs so far, "
                + (System.currentTimeMillis() - startTime)
                + " millis elapsed "
                + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000))
                + " docs per sec.");
        SearchPosting.saveSolr("reindex", solrDocs, "databusmeta");
        solrDocs = new ArrayList<SolrInputDocument>();
      }
    }
    if (solrDocs.size() > 0) {
      docsindexed += solrDocs.size();
      System.out.println(
          "hit solr doc batch size during finalization, "
              + docsindexed
              + " docs so far, "
              + (System.currentTimeMillis() - startTime)
              + " millis elapsed "
              + (docsindexed / ((System.currentTimeMillis() - startTime) / 1000))
              + " docs per sec.");
      SearchPosting.saveSolr("reindex", solrDocs, "databusmeta");
      solrDocs = new ArrayList<SolrInputDocument>();
    }
  }
コード例 #2
0
ファイル: SearchUtils.java プロジェクト: hopcroft/databus
  public static void indexTable(
      SecureTable t, DboTableMeta tableMeta, Collection<SolrInputDocument> solrDocs)
      throws IOException, SAXException, ParserConfigurationException, SolrServerException {
    // Solr server instance
    SolrServer solrServer = Search.getSolrServer();
    // Add this new table to the meta index...
    createCoreIfNeeded("databusmeta", "databusmeta", solrServer);
    SolrInputDocument doc = new SolrInputDocument();

    // Add special fields to track the type of record an the primary
    // key.
    doc.addField("id", t.getTableName());
    doc.addField("type", "table");
    if (t.getCreator() != null) doc.addField("creator_texts", t.getCreator().getUsername());
    doc.addField("description_texts", t.getDescription());
    if (t.getSchema() != null) {
      doc.addField("database_texts", t.getSchema().getSchemaName());
      doc.addField("databaseDescription_texts", t.getSchema().getDescription());
    }
    if (t.isSearchable()) doc.addField("isSearchable_texts", "true");
    else doc.addField("isSearchable_texts", "false");

    Set<String> allTermsSet = new HashSet<String>();
    Set<String> columnsSet = new HashSet<String>();

    for (DboColumnMeta m : tableMeta.getAllColumns()) {
      SdiColumn sdicol = t.getNameToField().get(m.getColumnName());
      columnsSet.add(sdicol.getColumnName());
    }

    doc.addField("column_texts", columnsSet);

    allTermsSet.add((String) doc.getField("id").getValue());
    allTermsSet.add((String) doc.getField("type").getValue());
    allTermsSet.add((String) doc.getField("description_texts").getValue());

    allTermsSet.addAll(columnsSet);
    if (doc.getField("database_texts") != null) {
      allTermsSet.add((String) doc.getField("database_texts").getValue());
      allTermsSet.add((String) doc.getField("databaseDescription_texts").getValue());
    }
    doc.addField("allTerms_texts", allTermsSet);

    solrDocs.add(doc);
    // Create the core for data from this table to be indexed into when data is posted to this table
    if (t.isSearchable()) createCoreIfNeeded(t.getTableName(), t.getTableName(), solrServer);
  }