Ejemplo n.º 1
0
  /**
   * Add a single, possibly compound index for the given field names and ensure all indexing
   * constraints are met.
   *
   * <p>This function generates a name for the new index.
   *
   * @param index The object that defines an index. Includes field list, name, type and options.
   * @return name of created index
   */
  @SuppressWarnings("unchecked")
  private String ensureIndexed(final Index index) {
    if (index == null) {
      return null;
    }

    if (index.indexType.equalsIgnoreCase("text")) {
      if (!IndexManager.ftsAvailable(queue, database)) {
        logger.log(
            Level.SEVERE,
            "Text search not supported.  To add support for text "
                + "search, enable FTS compile options in SQLite.");
        return null;
      }
    }

    final List<String> fieldNamesList = removeDirectionsFromFields(index.fieldNames);

    for (String fieldName : fieldNamesList) {
      if (!validFieldName(fieldName)) {
        // Logging handled in validFieldName
        return null;
      }
    }

    // Check there are no duplicate field names in the array
    Set<String> uniqueNames = new HashSet<String>(fieldNamesList);
    if (uniqueNames.size() != fieldNamesList.size()) {
      String msg =
          String.format("Cannot create index with duplicated field names %s", index.fieldNames);
      logger.log(Level.SEVERE, msg);
    }

    // Prepend _id and _rev if it's not in the array
    if (!fieldNamesList.contains("_rev")) {
      fieldNamesList.add(0, "_rev");
    }

    if (!fieldNamesList.contains("_id")) {
      fieldNamesList.add(0, "_id");
    }

    // Check the index limit.  Limit is 1 for "text" indexes and unlimited for "json" indexes.
    // Then check whether the index already exists; return success if it does and is same,
    // else fail.
    try {
      Map<String, Object> existingIndexes = listIndexesInDatabaseQueue();
      if (indexLimitReached(index, existingIndexes)) {
        String msg =
            String.format("Index limit reached.  Cannot create index %s.", index.indexName);
        logger.log(Level.SEVERE, msg);
        return null;
      }
      if (existingIndexes != null && existingIndexes.get(index.indexName) != null) {
        Map<String, Object> existingIndex =
            (Map<String, Object>) existingIndexes.get(index.indexName);
        String existingType = (String) existingIndex.get("type");
        String existingSettings = (String) existingIndex.get("settings");
        List<String> existingFieldsList = (List<String>) existingIndex.get("fields");
        Set<String> existingFields = new HashSet<String>(existingFieldsList);
        Set<String> newFields = new HashSet<String>(fieldNamesList);
        if (existingFields.equals(newFields)
            && index.compareIndexTypeTo(existingType, existingSettings)) {
          boolean success =
              IndexUpdater.updateIndex(index.indexName, fieldNamesList, database, datastore, queue);
          return success ? index.indexName : null;
        }
      }
    } catch (ExecutionException e) {
      logger.log(Level.SEVERE, "Execution error encountered:", e);
      return null;
    } catch (InterruptedException e) {
      logger.log(Level.SEVERE, "Execution interrupted error encountered:", e);
      return null;
    }

    Future<Boolean> result =
        queue.submit(
            new Callable<Boolean>() {
              @Override
              public Boolean call() {
                Boolean transactionSuccess = true;
                database.beginTransaction();

                // Insert metadata table entries
                for (String fieldName : fieldNamesList) {
                  ContentValues parameters = new ContentValues();
                  parameters.put("index_name", index.indexName);
                  parameters.put("index_type", index.indexType);
                  parameters.put("index_settings", index.settingsAsJSON());
                  parameters.put("field_name", fieldName);
                  parameters.put("last_sequence", 0);
                  long rowId = database.insert(IndexManager.INDEX_METADATA_TABLE_NAME, parameters);
                  if (rowId < 0) {
                    transactionSuccess = false;
                    break;
                  }
                }

                // Create SQLite data structures to support the index
                // For JSON index type create a SQLite table and a SQLite index
                // For TEXT index type create a SQLite virtual table
                List<String> columnList = new ArrayList<String>();
                for (String field : fieldNamesList) {
                  columnList.add("\"" + field + "\"");
                }

                List<String> statements = new ArrayList<String>();
                if (index.indexType.equalsIgnoreCase(Index.TEXT_TYPE)) {
                  List<String> settingsList = new ArrayList<String>();
                  // Add text settings
                  for (String key : index.indexSettings.keySet()) {
                    settingsList.add(String.format("%s=%s", key, index.indexSettings.get(key)));
                  }
                  statements.add(
                      createVirtualTableStatementForIndex(
                          index.indexName, columnList, settingsList));
                } else {
                  statements.add(createIndexTableStatementForIndex(index.indexName, columnList));
                  statements.add(createIndexIndexStatementForIndex(index.indexName, columnList));
                }
                for (String statement : statements) {
                  try {
                    database.execSQL(statement);
                  } catch (SQLException e) {
                    String msg = String.format("Index creation error occurred (%s):", statement);
                    logger.log(Level.SEVERE, msg, e);
                    transactionSuccess = false;
                    break;
                  }
                }

                if (transactionSuccess) {
                  database.setTransactionSuccessful();
                }
                database.endTransaction();

                return transactionSuccess;
              }
            });

    // Update the new index if it's been created
    boolean success;
    try {
      success = result.get();
    } catch (ExecutionException e) {
      logger.log(Level.SEVERE, "Execution error encountered:", e);
      return null;
    } catch (InterruptedException e) {
      logger.log(Level.SEVERE, "Execution interrupted error encountered:", e);
      return null;
    }

    if (success) {
      success =
          IndexUpdater.updateIndex(index.indexName, fieldNamesList, database, datastore, queue);
    }

    return success ? index.indexName : null;
  }
Ejemplo n.º 2
0
 public EventRepoReport report() {
   EventRepoReport report = new EventRepoReport().appendLine(Indexing.class.getSimpleName());
   report.appendReport("EventIndexing", eventIndexing.report());
   report.appendLine("IndexUpdater: " + indexUpdater.getClass().getSimpleName());
   return report;
 }
Ejemplo n.º 3
0
 /**
  * A new event has entered the surrounding {@link EventStore}, let this {@link Indexing} react
  * appropriately
  *
  * @param eventId <code>long</code>
  * @param ed The {@link EventSerializer} that is used for creating serialized data for given event
  *     type
  * @param event The event that has entered the {@link EventStore}
  */
 public void onNewEvent(long eventId, EventSerializer ed, Object event) {
   indexUpdater.onNewEvent(eventId, ed, event);
 }
Ejemplo n.º 4
0
 /**
  * Stops this {@link Indexing} and closes all associated files. This indexing is no longer usable
  * after this method has been called.
  */
 public void stop() {
   eventIndexing.stop();
   eventFieldIndexing.stop();
   indexUpdater.stop();
 }
Ejemplo n.º 5
0
  @Override
  public void analyzeInternal(ASTNode ast) throws SemanticException {
    isLocal = false;
    isOverWrite = false;
    Tree fromTree = ast.getChild(0);
    Tree tableTree = ast.getChild(1);

    if (ast.getChildCount() == 4) {
      isLocal = true;
      isOverWrite = true;
    }

    if (ast.getChildCount() == 3) {
      if (ast.getChild(2).getText().toLowerCase().equals("local")) {
        isLocal = true;
      } else {
        isOverWrite = true;
      }
    }

    // initialize load path
    URI fromURI;
    try {
      String fromPath = stripQuotes(fromTree.getText());
      fromURI = initializeFromURI(fromPath);
    } catch (IOException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    } catch (URISyntaxException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    }

    // initialize destination table/partition
    tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree);

    if (ts.tableHandle.isOffline()) {
      throw new SemanticException(
          ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName));
    }

    if (ts.tableHandle.isView()) {
      throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (ts.tableHandle.isNonNative()) {
      throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }

    if (ts.tableHandle.isStoredAsSubDirectories()) {
      throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
    }

    URI toURI =
        (ts.partHandle != null)
            ? ts.partHandle.getDataLocation()
            : ts.tableHandle.getDataLocation();

    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
      throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
    }

    // make sure the arguments make sense
    applyConstraints(fromURI, toURI, fromTree, isLocal);

    Task<? extends Serializable> rTask = null;

    // create copy work
    if (isLocal) {
      // if the local keyword is specified - we will always make a copy. this
      // might seem redundant in the case
      // that the hive warehouse is also located in the local file system - but
      // that's just a test case.
      String copyURIStr = ctx.getExternalTmpFileURI(toURI);
      URI copyURI = URI.create(copyURIStr);
      rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf);
      fromURI = copyURI;
    }

    // create final load/move work

    String loadTmpPath = ctx.getExternalTmpFileURI(toURI);
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
      partSpec = new LinkedHashMap<String, String>();
      outputs.add(new WriteEntity(ts.tableHandle));
    } else {
      try {
        Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
        if (part != null) {
          if (part.isOffline()) {
            throw new SemanticException(
                ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName()));
          }
          outputs.add(new WriteEntity(part));
        } else {
          outputs.add(new WriteEntity(ts.tableHandle));
        }
      } catch (HiveException e) {
        throw new SemanticException(e);
      }
    }

    LoadTableDesc loadTableWork =
        new LoadTableDesc(
            fromURI.toString(),
            loadTmpPath,
            Utilities.getTableDesc(ts.tableHandle),
            partSpec,
            isOverWrite);

    Task<? extends Serializable> childTask =
        TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf);
    if (rTask != null) {
      rTask.addDependentTask(childTask);
    } else {
      rTask = childTask;
    }

    rootTasks.add(rTask);

    // The user asked for stats to be collected.
    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
      StatsWork statDesc = new StatsWork(loadTableWork);
      statDesc.setNoStatsAggregator(true);
      statDesc.setClearAggregatorStats(true);
      statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      statTask = TaskFactory.get(statDesc, conf);
    }

    // HIVE-3334 has been filed for load file with index auto update
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
      IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
      try {
        List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();

        for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
          // LOAD DATA will either have a copy & move or just a move,
          // we always want the update to be dependent on the move
          childTask.addDependentTask(updateTask);
          if (statTask != null) {
            updateTask.addDependentTask(statTask);
          }
        }
      } catch (HiveException e) {
        console.printInfo(
            "WARNING: could not auto-update stale indexes, indexes are not out of sync");
      }
    } else if (statTask != null) {
      childTask.addDependentTask(statTask);
    }
  }