@Override public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); DataStorage storage = new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf)); ElementDescriptor schemaFilePath = storage.asElement(location, schemaFileName); if (!schemaFilePath.exists() && schema != null) { try { new ObjectMapper().writeValue(schemaFilePath.create(), schema); } catch (JsonGenerationException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } catch (JsonMappingException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } } if (printHeaders) { ElementDescriptor headerFilePath = storage.asElement(location, headerFileName); if (!headerFilePath.exists()) { OutputStream os = headerFilePath.create(); try { String[] names = schema.fieldNames(); String fn; for (int i = 0; i < names.length; i++) { fn = ((names[i] == null) ? ("$" + i) : names[i]); os.write(fn.getBytes("UTF-8")); if (i < names.length - 1) { os.write(fieldDel); } else { os.write(recordDel); } } } finally { os.close(); } } } }
/** * . Given a path, which may represent a glob pattern, a directory, comma separated files/glob * patterns or a file, this method finds the set of relevant metadata files on the storage system. * The algorithm for finding the metadata file is as follows: * * <p>For each object represented by the path (either directly, or via a glob): If object is a * directory, and path/metaname exists, use that as the metadata file. Else if parentPath/metaname * exists, use that as the metadata file. * * <p>Resolving conflicts, merging the metadata, etc, is not handled by this method and should be * taken care of by downstream code. * * <p> * * @param path Path, as passed in to a LoadFunc (may be a Hadoop glob) * @param metaname Metadata file designation, such as .pig_schema or .pig_stats * @param conf configuration object * @return Set of element descriptors for all metadata files associated with the files on the * path. */ protected Set<ElementDescriptor> findMetaFile(String path, String metaname, Configuration conf) throws IOException { Set<ElementDescriptor> metaFileSet = new HashSet<ElementDescriptor>(); String[] locations = LoadFunc.getPathStrings(path); for (String loc : locations) { DataStorage storage; storage = new HDataStorage(new Path(loc).toUri(), ConfigurationUtil.toProperties(conf)); String fullPath = FileLocalizer.fullPath(loc, storage); if (storage.isContainer(fullPath)) { ElementDescriptor metaFilePath = storage.asElement(fullPath, metaname); if (exists(metaFilePath)) { metaFileSet.add(metaFilePath); } } else { ElementDescriptor[] descriptors = storage.asCollection(loc); for (ElementDescriptor descriptor : descriptors) { ContainerDescriptor container = null; if (descriptor instanceof HFile) { Path descriptorPath = ((HPath) descriptor).getPath(); Path parent = descriptorPath.getParent(); container = new HDirectory((HDataStorage) storage, parent); } else { // descriptor instanceof HDirectory container = (HDirectory) descriptor; } // if no custom schema, try the parent directory ElementDescriptor metaFilePath = storage.asElement(container, metaname); if (exists(metaFilePath)) { metaFileSet.add(metaFilePath); } } } } return metaFileSet; }
@Override public void storeStatistics(ResourceStatistics stats, String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); DataStorage storage = new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf)); ElementDescriptor statFilePath = storage.asElement(location, statFileName); if (!statFilePath.exists() && stats != null) { try { new ObjectMapper().writeValue(statFilePath.create(), stats); } catch (JsonGenerationException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } catch (JsonMappingException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } } }