/** * stolen from JobControlCompiler TODO: refactor it to share this * * @param physicalPlan * @param poLoad * @param jobConf * @return * @throws java.io.IOException */ private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf) throws IOException { // 这部分似乎没用 Job job = new Job(jobConf); LoadFunc loadFunc = poLoad.getLoadFunc(); loadFunc.setLocation(poLoad.getLFile().getFileName(), job); // stolen from JobControlCompiler ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>(); // Store the inp filespecs pigInputs.add(poLoad.getLFile()); ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList(); ArrayList<String> inpSignatures = Lists.newArrayList(); ArrayList<Long> inpLimits = Lists.newArrayList(); // Store the target operators for tuples read // from this input List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad); List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList(); if (loadSuccessors != null) { for (PhysicalOperator loadSuccessor : loadSuccessors) { loadSuccessorsKeys.add(loadSuccessor.getOperatorKey()); } } inpTargets.add(loadSuccessorsKeys); inpSignatures.add(poLoad.getSignature()); inpLimits.add(poLoad.getLimit()); jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs)); jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets)); jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures)); jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits)); return jobConf; }
@Override public boolean hasNext() { if (!hasNextCalled) { hasNextCalled = true; if (tuple == null) { try { tuple = loadFunc.getNext(); } catch (IOException e) { throw new RuntimeException(e); } } } return tuple != null; }
/** * . Given a path, which may represent a glob pattern, a directory, comma separated files/glob * patterns or a file, this method finds the set of relevant metadata files on the storage system. * The algorithm for finding the metadata file is as follows: * * <p>For each object represented by the path (either directly, or via a glob): If object is a * directory, and path/metaname exists, use that as the metadata file. Else if parentPath/metaname * exists, use that as the metadata file. * * <p>Resolving conflicts, merging the metadata, etc, is not handled by this method and should be * taken care of by downstream code. * * <p> * * @param path Path, as passed in to a LoadFunc (may be a Hadoop glob) * @param metaname Metadata file designation, such as .pig_schema or .pig_stats * @param conf configuration object * @return Set of element descriptors for all metadata files associated with the files on the * path. */ protected Set<ElementDescriptor> findMetaFile(String path, String metaname, Configuration conf) throws IOException { Set<ElementDescriptor> metaFileSet = new HashSet<ElementDescriptor>(); String[] locations = LoadFunc.getPathStrings(path); for (String loc : locations) { DataStorage storage; storage = new HDataStorage(new Path(loc).toUri(), ConfigurationUtil.toProperties(conf)); String fullPath = FileLocalizer.fullPath(loc, storage); if (storage.isContainer(fullPath)) { ElementDescriptor metaFilePath = storage.asElement(fullPath, metaname); if (exists(metaFilePath)) { metaFileSet.add(metaFilePath); } } else { ElementDescriptor[] descriptors = storage.asCollection(loc); for (ElementDescriptor descriptor : descriptors) { ContainerDescriptor container = null; if (descriptor instanceof HFile) { Path descriptorPath = ((HPath) descriptor).getPath(); Path parent = descriptorPath.getParent(); container = new HDirectory((HDataStorage) storage, parent); } else { // descriptor instanceof HDirectory container = (HDirectory) descriptor; } // if no custom schema, try the parent directory ElementDescriptor metaFilePath = storage.asElement(container, metaname); if (exists(metaFilePath)) { metaFileSet.add(metaFilePath); } } } } return metaFileSet; }
@Override public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { return LoadFunc.getAbsolutePath(location, curDir); }
@Override public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { String path = LoadFunc.getAbsolutePath(location, curDir); log.debug("relToAbsPathForStoreLocation({}, {}) --> {}", new Object[] {location, curDir, path}); return path; }