Esempio n. 1
0
  /**
   * stolen from JobControlCompiler TODO: refactor it to share this
   *
   * @param physicalPlan
   * @param poLoad
   * @param jobConf
   * @return
   * @throws java.io.IOException
   */
  private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf)
      throws IOException {

    // 这部分似乎没用
    Job job = new Job(jobConf);
    LoadFunc loadFunc = poLoad.getLoadFunc();
    loadFunc.setLocation(poLoad.getLFile().getFileName(), job);

    // stolen from JobControlCompiler
    ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>();
    // Store the inp filespecs
    pigInputs.add(poLoad.getLFile());

    ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList();
    ArrayList<String> inpSignatures = Lists.newArrayList();
    ArrayList<Long> inpLimits = Lists.newArrayList();

    // Store the target operators for tuples read
    // from this input
    List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad);
    List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList();
    if (loadSuccessors != null) {
      for (PhysicalOperator loadSuccessor : loadSuccessors) {
        loadSuccessorsKeys.add(loadSuccessor.getOperatorKey());
      }
    }

    inpTargets.add(loadSuccessorsKeys);
    inpSignatures.add(poLoad.getSignature());
    inpLimits.add(poLoad.getLimit());

    jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs));
    jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets));
    jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures));
    jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits));

    return jobConf;
  }
 @Override
 public boolean hasNext() {
   if (!hasNextCalled) {
     hasNextCalled = true;
     if (tuple == null) {
       try {
         tuple = loadFunc.getNext();
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     }
   }
   return tuple != null;
 }
  /**
   * . Given a path, which may represent a glob pattern, a directory, comma separated files/glob
   * patterns or a file, this method finds the set of relevant metadata files on the storage system.
   * The algorithm for finding the metadata file is as follows:
   *
   * <p>For each object represented by the path (either directly, or via a glob): If object is a
   * directory, and path/metaname exists, use that as the metadata file. Else if parentPath/metaname
   * exists, use that as the metadata file.
   *
   * <p>Resolving conflicts, merging the metadata, etc, is not handled by this method and should be
   * taken care of by downstream code.
   *
   * <p>
   *
   * @param path Path, as passed in to a LoadFunc (may be a Hadoop glob)
   * @param metaname Metadata file designation, such as .pig_schema or .pig_stats
   * @param conf configuration object
   * @return Set of element descriptors for all metadata files associated with the files on the
   *     path.
   */
  protected Set<ElementDescriptor> findMetaFile(String path, String metaname, Configuration conf)
      throws IOException {
    Set<ElementDescriptor> metaFileSet = new HashSet<ElementDescriptor>();
    String[] locations = LoadFunc.getPathStrings(path);
    for (String loc : locations) {
      DataStorage storage;

      storage = new HDataStorage(new Path(loc).toUri(), ConfigurationUtil.toProperties(conf));

      String fullPath = FileLocalizer.fullPath(loc, storage);

      if (storage.isContainer(fullPath)) {
        ElementDescriptor metaFilePath = storage.asElement(fullPath, metaname);
        if (exists(metaFilePath)) {
          metaFileSet.add(metaFilePath);
        }
      } else {
        ElementDescriptor[] descriptors = storage.asCollection(loc);
        for (ElementDescriptor descriptor : descriptors) {
          ContainerDescriptor container = null;

          if (descriptor instanceof HFile) {
            Path descriptorPath = ((HPath) descriptor).getPath();
            Path parent = descriptorPath.getParent();
            container = new HDirectory((HDataStorage) storage, parent);
          } else { // descriptor instanceof HDirectory
            container = (HDirectory) descriptor;
          }

          // if no custom schema, try the parent directory
          ElementDescriptor metaFilePath = storage.asElement(container, metaname);
          if (exists(metaFilePath)) {
            metaFileSet.add(metaFilePath);
          }
        }
      }
    }
    return metaFileSet;
  }
 @Override
 public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
   return LoadFunc.getAbsolutePath(location, curDir);
 }
Esempio n. 5
0
 @Override
 public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException {
   String path = LoadFunc.getAbsolutePath(location, curDir);
   log.debug("relToAbsPathForStoreLocation({}, {}) --> {}", new Object[] {location, curDir, path});
   return path;
 }