private void writeIndexDescriptors(ETwinIndexDescriptor ETwinIndexDescriptor) throws IOException { Configuration conf = getConf(); FileSystem fs = (new Path(IndexConfig.index.get()).getFileSystem(conf)); FileStatus[] fileStats = fs.globStatus(new Path(IndexConfig.index.get(), "*")); // We write one indexDescriptor per generated index segment. // Something to consider: right now it's a straight-up serialized Thrift object. // Would it be better to do the LzoBase64Line thing, so that we can apply our tools? // or extend the tools? for (int i = 0; i < fileStats.length; i++) { ETwinIndexDescriptor.setIndexPart(i); FileStatus stat = fileStats[i]; Path idxPath = new Path(stat.getPath().getParent(), "_" + stat.getPath().getName() + ".indexmeta"); FSDataOutputStream os = fs.create(idxPath, true); @SuppressWarnings("unchecked") ThriftWritable<ETwinIndexDescriptor> writable = (ThriftWritable<ETwinIndexDescriptor>) ThriftWritable.newInstance(ETwinIndexDescriptor.getClass()); writable.set(ETwinIndexDescriptor); writable.write(os); os.close(); } }
/** * Implementations that have custom fields to add to the IndexDescriptor should override this, * call super.getIndexDescriptor(), and then populate options field appropriately for each index * file. * * @return ETwinIndexDescriptor * @throws IOException */ public ETwinIndexDescriptor getIndexDescriptor() throws IOException { final ETwinIndexDescriptor desc = new ETwinIndexDescriptor(); HdfsFsWalker walker = new HdfsFsWalker(getConf()); // The silly-looking Exception wrapping is due to F3 not letting us throw // checked exceptions. try { walker.walk( new Path(IndexConfig.index.get()), new F2<Boolean, FileStatus, FileSystem>() { @Override public Boolean eval(FileStatus status, FileSystem fs) { if (!status.isDir()) { try { desc.addToFileIndexDescriptors(buildFileIndexDescriptor(status.getPath())); } catch (IOException e) { LOG.error("unable to buld a FileIndexDescriptor for " + status.getPath(), e); throw new RuntimeException( "unable to buld a FileIndexDescriptor for " + status.getPath(), e); } } return true; } }); } catch (RuntimeException e) { throw new IOException(e); } desc.options = Maps.newHashMap(); try { desc.options.put( Strings.camelize(ReflectionHelper.nameForOption(params, "samplePercentage")), String.valueOf(IndexConfig.samplePercentage.get())); desc.options.put( Strings.camelize(ReflectionHelper.nameForOption(params, "analyzer")), IndexConfig.analyzer.get()); desc.options.put( Strings.camelize(ReflectionHelper.nameForOption(params, "similarity")), IndexConfig.similarity.get()); } catch (SecurityException e) { throw new IOException(e); } catch (NoSuchFieldException e) { throw new IOException(e); } // The idea is that as index parts are written out, they will write in their part #s. desc.setIndexPart(0); return desc; }