/** * Read the schema from json metadata file If isSchemaOn parameter is false, the errors are * suppressed and logged * * @param location * @param job * @param isSchemaOn * @return schema * @throws IOException */ public ResourceSchema getSchema(String location, Job job, boolean isSchemaOn) throws IOException { Configuration conf = job.getConfiguration(); Set<ElementDescriptor> schemaFileSet = null; try { schemaFileSet = findMetaFile(location, schemaFileName, conf); } catch (IOException e) { String msg = "Could not find schema file for " + location; return nullOrException(isSchemaOn, msg, e); } // TODO we assume that all schemas are the same. The question of merging schemas is left open // for now. ElementDescriptor schemaFile = null; if (!schemaFileSet.isEmpty()) { schemaFile = schemaFileSet.iterator().next(); } else { String msg = "Could not find schema file for " + location; return nullOrException(isSchemaOn, msg, null); } log.debug("Found schema file: " + schemaFile.toString()); ResourceSchema resourceSchema = null; try { resourceSchema = new ObjectMapper().readValue(schemaFile.open(), ResourceSchema.class); } catch (JsonParseException e) { String msg = "Unable to load Resource Schema for " + location; return nullOrException(isSchemaOn, msg, e); } catch (JsonMappingException e) { String msg = "Unable to load Resource Schema for " + location; return nullOrException(isSchemaOn, msg, e); } catch (IOException e) { String msg = "Unable to load Resource Schema for " + location; return nullOrException(isSchemaOn, msg, e); } return resourceSchema; }
/** * For JsonMetadata stats are considered optional This method suppresses (and logs) errors if they * are encountered. * * @see org.apache.pig.LoadMetadata#getStatistics(String, Job) */ @Override public ResourceStatistics getStatistics(String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); Set<ElementDescriptor> statFileSet = null; try { statFileSet = findMetaFile(location, statFileName, conf); } catch (IOException e) { log.warn("could not fine stat file for " + location); return null; } ElementDescriptor statFile = null; if (!statFileSet.isEmpty()) { statFile = statFileSet.iterator().next(); } else { log.warn("Could not find stat file for " + location); return null; } log.debug("Found stat file " + statFile.toString()); ResourceStatistics resourceStats = null; try { resourceStats = new ObjectMapper().readValue(statFile.open(), ResourceStatistics.class); } catch (JsonParseException e) { log.warn("Unable to load Resource Statistics for " + location); e.printStackTrace(); } catch (JsonMappingException e) { log.warn("Unable to load Resource Statistics for " + location); e.printStackTrace(); } catch (IOException e) { log.warn("Unable to load Resource Statistics for " + location); e.printStackTrace(); } return resourceStats; }
private boolean exists(ElementDescriptor e) throws IOException { if (lookupCache.containsKey(e)) { return lookupCache.get(e); } else { boolean res = e.exists(); lookupCache.put(e, res); return res; } }
@Override public void storeStatistics(ResourceStatistics stats, String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); DataStorage storage = new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf)); ElementDescriptor statFilePath = storage.asElement(location, statFileName); if (!statFilePath.exists() && stats != null) { try { new ObjectMapper().writeValue(statFilePath.create(), stats); } catch (JsonGenerationException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } catch (JsonMappingException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } } }
@Override public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); DataStorage storage = new HDataStorage(new Path(location).toUri(), ConfigurationUtil.toProperties(conf)); ElementDescriptor schemaFilePath = storage.asElement(location, schemaFileName); if (!schemaFilePath.exists() && schema != null) { try { new ObjectMapper().writeValue(schemaFilePath.create(), schema); } catch (JsonGenerationException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } catch (JsonMappingException e) { log.warn("Unable to write Resource Statistics for " + location); e.printStackTrace(); } } if (printHeaders) { ElementDescriptor headerFilePath = storage.asElement(location, headerFileName); if (!headerFilePath.exists()) { OutputStream os = headerFilePath.create(); try { String[] names = schema.fieldNames(); String fn; for (int i = 0; i < names.length; i++) { fn = ((names[i] == null) ? ("$" + i) : names[i]); os.write(fn.getBytes("UTF-8")); if (i < names.length - 1) { os.write(fieldDel); } else { os.write(recordDel); } } } finally { os.close(); } } } }