@Override public void analyzeInternal(ASTNode ast) throws SemanticException { isLocal = false; isOverWrite = false; Tree fromTree = ast.getChild(0); Tree tableTree = ast.getChild(1); if (ast.getChildCount() == 4) { isLocal = true; isOverWrite = true; } if (ast.getChildCount() == 3) { if (ast.getChild(2).getText().toLowerCase().equals("local")) { isLocal = true; } else { isOverWrite = true; } } // initialize load path URI fromURI; try { String fromPath = stripQuotes(fromTree.getText()); fromURI = initializeFromURI(fromPath); } catch (IOException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } catch (URISyntaxException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e); } // initialize destination table/partition tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); if (ts.tableHandle.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName)); } if (ts.tableHandle.isView()) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } if (ts.tableHandle.isNonNative()) { throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg()); } if (ts.tableHandle.isStoredAsSubDirectories()) { throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg()); } URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation(); List<FieldSchema> parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); } // make sure the arguments make sense applyConstraints(fromURI, toURI, fromTree, isLocal); Task<? extends Serializable> rTask = null; // create copy work if (isLocal) { // if the local keyword is specified - we will always make a copy. this // might seem redundant in the case // that the hive warehouse is also located in the local file system - but // that's just a test case. String copyURIStr = ctx.getExternalTmpFileURI(toURI); URI copyURI = URI.create(copyURIStr); rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr), conf); fromURI = copyURI; } // create final load/move work String loadTmpPath = ctx.getExternalTmpFileURI(toURI); Map<String, String> partSpec = ts.getPartSpec(); if (partSpec == null) { partSpec = new LinkedHashMap<String, String>(); outputs.add(new WriteEntity(ts.tableHandle)); } else { try { Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false); if (part != null) { if (part.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(ts.tableName + ":" + part.getName())); } outputs.add(new WriteEntity(part)); } else { outputs.add(new WriteEntity(ts.tableHandle)); } } catch (HiveException e) { throw new SemanticException(e); } } LoadTableDesc loadTableWork = new LoadTableDesc( fromURI.toString(), loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf); if (rTask != null) { rTask.addDependentTask(childTask); } else { rTask = childTask; } rootTasks.add(rTask); // The user asked for stats to be collected. // Some stats like number of rows require a scan of the data // However, some other stats, like number of files, do not require a complete scan // Update the stats which do not require a complete scan. Task<? extends Serializable> statTask = null; if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsWork statDesc = new StatsWork(loadTableWork); statDesc.setNoStatsAggregator(true); statDesc.setClearAggregatorStats(true); statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); statTask = TaskFactory.get(statDesc, conf); } // HIVE-3334 has been filed for load file with index auto update if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf); try { List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks(); for (Task<? extends Serializable> updateTask : indexUpdateTasks) { // LOAD DATA will either have a copy & move or just a move, // we always want the update to be dependent on the move childTask.addDependentTask(updateTask); if (statTask != null) { updateTask.addDependentTask(statTask); } } } catch (HiveException e) { console.printInfo( "WARNING: could not auto-update stale indexes, indexes are not out of sync"); } } else if (statTask != null) { childTask.addDependentTask(statTask); } }
private List<HiveLockObj> getLockObjects( QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException { List<HiveLockObj> locks = new LinkedList<HiveLockObj>(); HiveLockObject.HiveLockObjectData lockData = new HiveLockObject.HiveLockObjectData( plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); if (db != null) { locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode)); return locks; } if (t != null) { locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode)); mode = HiveLockMode.SHARED; locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode)); return locks; } if (p != null) { if (!(p instanceof DummyPartition)) { locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode)); } // All the parents are locked in shared mode mode = HiveLockMode.SHARED; // For dummy partitions, only partition name is needed String name = p.getName(); if (p instanceof DummyPartition) { name = p.getName().split("@")[2]; } String partialName = ""; String[] partns = name.split("/"); int len = p instanceof DummyPartition ? partns.length : partns.length - 1; Map<String, String> partialSpec = new LinkedHashMap<String, String>(); for (int idx = 0; idx < len; idx++) { String partn = partns[idx]; partialName += partn; String[] nameValue = partn.split("="); assert (nameValue.length == 2); partialSpec.put(nameValue[0], nameValue[1]); try { locks.add( new HiveLockObj( new HiveLockObject( new DummyPartition( p.getTable(), p.getTable().getDbName() + "/" + MetaStoreUtils.encodeTableName(p.getTable().getTableName()) + "/" + partialName, partialSpec), lockData), mode)); partialName += "/"; } catch (HiveException e) { throw new LockException(e.getMessage()); } } locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode)); locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode)); } return locks; }