/** * Add a single, possibly compound index for the given field names and ensure all indexing * constraints are met. * * <p>This function generates a name for the new index. * * @param index The object that defines an index. Includes field list, name, type and options. * @return name of created index */ @SuppressWarnings("unchecked") private String ensureIndexed(final Index index) { if (index == null) { return null; } if (index.indexType.equalsIgnoreCase("text")) { if (!IndexManager.ftsAvailable(queue, database)) { logger.log( Level.SEVERE, "Text search not supported. To add support for text " + "search, enable FTS compile options in SQLite."); return null; } } final List<String> fieldNamesList = removeDirectionsFromFields(index.fieldNames); for (String fieldName : fieldNamesList) { if (!validFieldName(fieldName)) { // Logging handled in validFieldName return null; } } // Check there are no duplicate field names in the array Set<String> uniqueNames = new HashSet<String>(fieldNamesList); if (uniqueNames.size() != fieldNamesList.size()) { String msg = String.format("Cannot create index with duplicated field names %s", index.fieldNames); logger.log(Level.SEVERE, msg); } // Prepend _id and _rev if it's not in the array if (!fieldNamesList.contains("_rev")) { fieldNamesList.add(0, "_rev"); } if (!fieldNamesList.contains("_id")) { fieldNamesList.add(0, "_id"); } // Check the index limit. Limit is 1 for "text" indexes and unlimited for "json" indexes. // Then check whether the index already exists; return success if it does and is same, // else fail. try { Map<String, Object> existingIndexes = listIndexesInDatabaseQueue(); if (indexLimitReached(index, existingIndexes)) { String msg = String.format("Index limit reached. Cannot create index %s.", index.indexName); logger.log(Level.SEVERE, msg); return null; } if (existingIndexes != null && existingIndexes.get(index.indexName) != null) { Map<String, Object> existingIndex = (Map<String, Object>) existingIndexes.get(index.indexName); String existingType = (String) existingIndex.get("type"); String existingSettings = (String) existingIndex.get("settings"); List<String> existingFieldsList = (List<String>) existingIndex.get("fields"); Set<String> existingFields = new HashSet<String>(existingFieldsList); Set<String> newFields = new HashSet<String>(fieldNamesList); if (existingFields.equals(newFields) && index.compareIndexTypeTo(existingType, existingSettings)) { boolean success = IndexUpdater.updateIndex(index.indexName, fieldNamesList, database, datastore, queue); return success ? index.indexName : null; } } } catch (ExecutionException e) { logger.log(Level.SEVERE, "Execution error encountered:", e); return null; } catch (InterruptedException e) { logger.log(Level.SEVERE, "Execution interrupted error encountered:", e); return null; } Future<Boolean> result = queue.submit( new Callable<Boolean>() { @Override public Boolean call() { Boolean transactionSuccess = true; database.beginTransaction(); // Insert metadata table entries for (String fieldName : fieldNamesList) { ContentValues parameters = new ContentValues(); parameters.put("index_name", index.indexName); parameters.put("index_type", index.indexType); parameters.put("index_settings", index.settingsAsJSON()); parameters.put("field_name", fieldName); parameters.put("last_sequence", 0); long rowId = database.insert(IndexManager.INDEX_METADATA_TABLE_NAME, parameters); if (rowId < 0) { transactionSuccess = false; break; } } // Create SQLite data structures to support the index // For JSON index type create a SQLite table and a SQLite index // For TEXT index type create a SQLite virtual table List<String> columnList = new ArrayList<String>(); for (String field : fieldNamesList) { columnList.add("\"" + field + "\""); } List<String> statements = new ArrayList<String>(); if (index.indexType.equalsIgnoreCase(Index.TEXT_TYPE)) { List<String> settingsList = new ArrayList<String>(); // Add text settings for (String key : index.indexSettings.keySet()) { settingsList.add(String.format("%s=%s", key, index.indexSettings.get(key))); } statements.add( createVirtualTableStatementForIndex( index.indexName, columnList, settingsList)); } else { statements.add(createIndexTableStatementForIndex(index.indexName, columnList)); statements.add(createIndexIndexStatementForIndex(index.indexName, columnList)); } for (String statement : statements) { try { database.execSQL(statement); } catch (SQLException e) { String msg = String.format("Index creation error occurred (%s):", statement); logger.log(Level.SEVERE, msg, e); transactionSuccess = false; break; } } if (transactionSuccess) { database.setTransactionSuccessful(); } database.endTransaction(); return transactionSuccess; } }); // Update the new index if it's been created boolean success; try { success = result.get(); } catch (ExecutionException e) { logger.log(Level.SEVERE, "Execution error encountered:", e); return null; } catch (InterruptedException e) { logger.log(Level.SEVERE, "Execution interrupted error encountered:", e); return null; } if (success) { success = IndexUpdater.updateIndex(index.indexName, fieldNamesList, database, datastore, queue); } return success ? index.indexName : null; }
public EventRepoReport report() { EventRepoReport report = new EventRepoReport().appendLine(Indexing.class.getSimpleName()); report.appendReport("EventIndexing", eventIndexing.report()); report.appendLine("IndexUpdater: " + indexUpdater.getClass().getSimpleName()); return report; }
/** * A new event has entered the surrounding {@link EventStore}, let this {@link Indexing} react * appropriately * * @param eventId <code>long</code> * @param ed The {@link EventSerializer} that is used for creating serialized data for given event * type * @param event The event that has entered the {@link EventStore} */ public void onNewEvent(long eventId, EventSerializer ed, Object event) { indexUpdater.onNewEvent(eventId, ed, event); }
/** * Stops this {@link Indexing} and closes all associated files. This indexing is no longer usable * after this method has been called. */ public void stop() { eventIndexing.stop(); eventFieldIndexing.stop(); indexUpdater.stop(); }
@Override public void analyzeInternal(ASTNode ast) throws SemanticException { isLocal = false; isOverWrite = false; Tree fromTree = ast.getChild(0); Tree tableTree = ast.getChild(1); if (ast.getChildCount() == 4) { isLocal = true; isOverWrite = true; } if (ast.getChildCount() == 3) { if (ast.getChild(2).getText().toLowerCase().equals("local")) { isLocal = true; } else { isOverWrite = true; } } // initialize load path URI fromURI; try { String fromPath = stripQuotes(fromTree.getText()); fromURI = initializeFromURI(fromPath); } catch (IOException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } catch (URISyntaxException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } // initialize destination table/partition tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); if (ts.tableHandle.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName)); } if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } if (ts.tableHandle.isNonNative()) { throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg()); } if (ts.tableHandle.isStoredAsSubDirectories()) { throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg()); } URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation(); List<FieldSchema> parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); } // make sure the arguments make sense applyConstraints(fromURI, toURI, fromTree, isLocal); Task<? extends Serializable> rTask = null; // create copy work if (isLocal) { // if the local keyword is specified - we will always make a copy. this // might seem redundant in the case // that the hive warehouse is also located in the local file system - but // that's just a test case. String copyURIStr = ctx.getExternalTmpFileURI(toURI); URI copyURI = URI.create(copyURIStr); rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf); fromURI = copyURI; } // create final load/move work String loadTmpPath = ctx.getExternalTmpFileURI(toURI); Map<String, String> partSpec = ts.getPartSpec(); if (partSpec == null) { partSpec = new LinkedHashMap<String, String>(); outputs.add(new WriteEntity(ts.tableHandle)); } else { try { Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false); if (part != null) { if (part.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName())); } outputs.add(new WriteEntity(part)); } else { outputs.add(new WriteEntity(ts.tableHandle)); } } catch (HiveException e) { throw new SemanticException(e); } } LoadTableDesc loadTableWork = new LoadTableDesc( fromURI.toString(), loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf); if (rTask != null) { rTask.addDependentTask(childTask); } else { rTask = childTask; } rootTasks.add(rTask); // The user asked for stats to be collected. // Some stats like number of rows require a scan of the data // However, some other stats, like number of files, do not require a complete scan // Update the stats which do not require a complete scan. Task<? extends Serializable> statTask = null; if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsWork statDesc = new StatsWork(loadTableWork); statDesc.setNoStatsAggregator(true); statDesc.setClearAggregatorStats(true); statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); statTask = TaskFactory.get(statDesc, conf); } // HIVE-3334 has been filed for load file with index auto update if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf); try { List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks(); for (Task<? extends Serializable> updateTask : indexUpdateTasks) { // LOAD DATA will either have a copy & move or just a move, // we always want the update to be dependent on the move childTask.addDependentTask(updateTask); if (statTask != null) { updateTask.addDependentTask(statTask); } } } catch (HiveException e) { console.printInfo( "WARNING: could not auto-update stale indexes, indexes are not out of sync"); } } else if (statTask != null) { childTask.addDependentTask(statTask); } }