Ejemplo n.º 1
0
 private void write(Object part, int id, Vector vector) throws IOException {
   SequenceFile.Writer writer = writers.get(part);
   if (writer == null) {
     Configuration conf = UDFContext.getUDFContext().getJobConf();
     Path file = PathUtils.enter(getStorePath(), String.valueOf(part), "part-" + Env.getPartID());
     writer = IOUtils.forSequenceWrite(conf, file, IntWritable.class, VectorWritable.class);
     writers.put(part, writer);
   }
   keyWritable.set(id);
   valueWritable.set(vector);
   writer.append(keyWritable, valueWritable);
 }
Ejemplo n.º 2
0
 public GroupedVectorStore() throws IOException {
   super(new NullOutputFormat());
   Env.inBackground(
       new Env.BackgroundProcedure() {
         @Override
         public void execute(Configuration conf) throws IOException {
           tvgen =
               VectorUtils.createVectorGenerator(
                   Env.getProperty(GroupedVectorStore.class, "vector.type"));
           writers = new HashMap<Object, SequenceFile.Writer>();
           keyWritable = new IntWritable();
           valueWritable = new VectorWritable();
         }
       });
 }
Ejemplo n.º 3
0
 @Override
 public void checkSchema(ResourceSchema schema) throws IOException {
   SchemaUtils.claim(
       schema,
       0,
       DataType.INTEGER,
       DataType.CHARARRAY,
       DataType.UNKNOWN); // the part name (usually will be int, but generally accept any type)
   ResourceSchema.ResourceFieldSchema bag = SchemaUtils.claim(schema, 1, DataType.BAG); // the bag
   ResourceSchema.ResourceFieldSchema tuple =
       SchemaUtils.claim(bag, 0, DataType.TUPLE); // the tuple of (id, vector)
   SchemaUtils.claim(tuple, 0, DataType.INTEGER); // the id
   tuple = SchemaUtils.claim(tuple, 1, DataType.TUPLE); // the vector
   Env.setProperty(
       GroupedVectorStore.class, "vector.type", VectorUtils.typeOfVector(tuple.getSchema()));
 }