Пример #1
0
 @Override
 public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException {
   Configuration conf = job.getConfiguration();
   DataStorage storage =
       new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf));
   ElementDescriptor schemaFilePath = storage.asElement(location, schemaFileName);
   if (!schemaFilePath.exists() && schema != null) {
     try {
       new ObjectMapper().writeValue(schemaFilePath.create(), schema);
     } catch (JsonGenerationException e) {
       log.warn("Unable to write Resource Statistics for " + location);
       e.printStackTrace();
     } catch (JsonMappingException e) {
       log.warn("Unable to write Resource Statistics for " + location);
       e.printStackTrace();
     }
   }
   if (printHeaders) {
     ElementDescriptor headerFilePath = storage.asElement(location, headerFileName);
     if (!headerFilePath.exists()) {
       OutputStream os = headerFilePath.create();
       try {
         String[] names = schema.fieldNames();
         String fn;
         for (int i = 0; i < names.length; i++) {
           fn = ((names[i] == null) ? ("$" + i) : names[i]);
           os.write(fn.getBytes("UTF-8"));
           if (i < names.length - 1) {
             os.write(fieldDel);
           } else {
             os.write(recordDel);
           }
         }
       } finally {
         os.close();
       }
     }
   }
 }
Пример #2
0
  /**
   * . Given a path, which may represent a glob pattern, a directory, comma separated files/glob
   * patterns or a file, this method finds the set of relevant metadata files on the storage system.
   * The algorithm for finding the metadata file is as follows:
   *
   * <p>For each object represented by the path (either directly, or via a glob): If object is a
   * directory, and path/metaname exists, use that as the metadata file. Else if parentPath/metaname
   * exists, use that as the metadata file.
   *
   * <p>Resolving conflicts, merging the metadata, etc, is not handled by this method and should be
   * taken care of by downstream code.
   *
   * <p>
   *
   * @param path Path, as passed in to a LoadFunc (may be a Hadoop glob)
   * @param metaname Metadata file designation, such as .pig_schema or .pig_stats
   * @param conf configuration object
   * @return Set of element descriptors for all metadata files associated with the files on the
   *     path.
   */
  protected Set<ElementDescriptor> findMetaFile(String path, String metaname, Configuration conf)
      throws IOException {
    Set<ElementDescriptor> metaFileSet = new HashSet<ElementDescriptor>();
    String[] locations = LoadFunc.getPathStrings(path);
    for (String loc : locations) {
      DataStorage storage;

      storage = new HDataStorage(new Path(loc).toUri(), ConfigurationUtil.toProperties(conf));

      String fullPath = FileLocalizer.fullPath(loc, storage);

      if (storage.isContainer(fullPath)) {
        ElementDescriptor metaFilePath = storage.asElement(fullPath, metaname);
        if (exists(metaFilePath)) {
          metaFileSet.add(metaFilePath);
        }
      } else {
        ElementDescriptor[] descriptors = storage.asCollection(loc);
        for (ElementDescriptor descriptor : descriptors) {
          ContainerDescriptor container = null;

          if (descriptor instanceof HFile) {
            Path descriptorPath = ((HPath) descriptor).getPath();
            Path parent = descriptorPath.getParent();
            container = new HDirectory((HDataStorage) storage, parent);
          } else { // descriptor instanceof HDirectory
            container = (HDirectory) descriptor;
          }

          // if no custom schema, try the parent directory
          ElementDescriptor metaFilePath = storage.asElement(container, metaname);
          if (exists(metaFilePath)) {
            metaFileSet.add(metaFilePath);
          }
        }
      }
    }
    return metaFileSet;
  }
Пример #3
0
 @Override
 public void storeStatistics(ResourceStatistics stats, String location, Job job)
     throws IOException {
   Configuration conf = job.getConfiguration();
   DataStorage storage =
       new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf));
   ElementDescriptor statFilePath = storage.asElement(location, statFileName);
   if (!statFilePath.exists() && stats != null) {
     try {
       new ObjectMapper().writeValue(statFilePath.create(), stats);
     } catch (JsonGenerationException e) {
       log.warn("Unable to write Resource Statistics for " + location);
       e.printStackTrace();
     } catch (JsonMappingException e) {
       log.warn("Unable to write Resource Statistics for " + location);
       e.printStackTrace();
     }
   }
 }